## MatplotLib
- A python package for data visualization
- https://matplotlib.org/stable/index.html

In [None]:
import matplotlib.colors as normCol       # Allows us to normalize colors later
import matplotlib.pyplot as plt
from sklearn import datasets #This is for the dataset
import numpy as np # This is for data manipulation

Matplotlib style
Add some nilearn at the end

Packages:
- normCol: Allows us to normalize colors to the dataset.
- plt: This is the primary matplotlib tool.
- datasets: The sklearn package contains many useful machine learning tool and also comes with some handy datasets. Here we import only the datasets just for some test data.
- np: Numpy is heavily integrated in matplotlib and is a very handy data maniuplation tool.

# Here we can see all the editable atributes of a matplotlib figure

![image.png](attachment:e986f18a-e091-4207-b364-edf0fda12282.png)

We will use this sample data to generate a simple scatterplot.

In [None]:
X = [0,1,2,3,4,5]
Y = [0,2,4,6,8,10]

In [None]:
plt.scatter(X, Y)
plt.show

# Although relativly bare, extensive customization options exist.

In [None]:
colors = ['green', 'red', 'blue', 'orange', 'hotpink', 'purple']
plt.scatter(X,Y, marker = 'd', color = colors, alpha=0.5)

In [None]:
normalize = normCol.Normalize(vmin=0, vmax=5)
plt.style.use('bmh')
plt.scatter(X,Y, s=100, marker = 'd', c=[1,2,3,4,5,6], norm=normalize)

## From here we will be working primarily with the iris dataset.

Here is a link if you want to learn more about the dataset:
https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html

In [None]:
iris = datasets.load_iris()
print(iris["target_names"])
print(iris["feature_names"])

## The code above tells us that there are 3 different iris species and each iris had 4 measurements.

In [None]:
Select_iris = iris.data
Target_iris = iris.target
classes = len(iris["target_names"])

## Much like other forms of programming, there are multiple paths to the same end.
First lets try a more code intensive approach.

In [None]:
plt.style.use('seaborn')


for i in range(classes):
    index = np.where(Target_iris == i)
    plt.scatter(Select_iris[index,2], Select_iris[index, 3], label = iris.target_names[i])
plt.legend()
plt.xlabel(iris.feature_names[2])
plt.ylabel(iris.feature_names[3])
plt.grid(linestyle = "--", linewidth = 0.5)
plt.show

While the above code is easily reproducable, it does limit our freedoms for custimization. Lets try a more liberating approach.

To start we split the iris dataset into three different groups by their species.

In [None]:
newarr = np.array_split(Target_iris, len(Target_iris))
NetIris = np.append(newarr, Select_iris, axis=1)
iris1 = NetIris[NetIris[:,0] == 0]
iris2 = NetIris[NetIris[:,0] == 1]
iris3 = NetIris[NetIris[:,0] == 2]

In [None]:
fig = plt.figure()
ax1 = fig.add_subplot()
ax1.scatter(iris1[:,3], iris1[:,4], label = iris.target_names[0], marker = 'd')
ax1.scatter(iris2[:,3], iris2[:,4], label = iris.target_names[1], color = 'purple')
ax1.scatter(iris3[:,3], iris3[:,4], label = iris.target_names[2])
plt.legend()
plt.xlabel(iris.feature_names[2])
plt.ylabel(iris.feature_names[3])
plt.grid(linestyle = "--", linewidth = 0.5)
plt.show


## As we can see, although more code may be present it can be easier to follow and allows for additional custimization of options. Still we can improve this further.

Prior to now we would have to change every reference to the iris variables. How does the code below increase reproducability?

In [None]:
newarr = np.array_split(Target_iris, len(Target_iris))
NetIris = np.append(newarr, Select_iris, axis=1)
iris1 = NetIris[NetIris[:,0] == 0]
iris2 = NetIris[NetIris[:,0] == 1]
iris3 = NetIris[NetIris[:,0] == 2]

x = 3
y = 4

x1 = iris1[:,x]
y1 = iris1[:,y]

x2 = iris2[:,x]
y2 = iris2[:,y]

x3 = iris3[:,x]
y3 = iris3[:,y]

xName = iris.feature_names[x-1]
yName = iris.feature_names[y-1]

In [None]:
fig = plt.figure()
ax1 = fig.add_subplot()
ax1.scatter(x1, y1, label = iris.target_names[0], marker = 'd')
ax1.scatter(x2, y2, label = iris.target_names[1], color = 'purple')
ax1.scatter(x3, y3, label = iris.target_names[2])
plt.legend()
plt.xlabel(xName)
plt.ylabel(yName)
plt.grid(linestyle = "--", linewidth = 0.5)
plt.show


# How does the above code improve reproducability?

In [None]:
fig = plt.figure()
ax1 = fig.add_subplot()
ax1.scatter(x1, y1, s = iris1[:,2]*50, alpha = 0.25, label = iris.target_names[0])
ax1.scatter(x2, y2, s = iris2[:,2]*50, alpha = 0.5, label = iris.target_names[1])
ax1.scatter(x3, y3, s = iris3[:,2]*50, alpha = 0.75, label = iris.target_names[2])
plt.legend()
plt.xlabel(xName)
plt.ylabel(yName)
plt.grid(linestyle = "--", linewidth = 0.5)
plt.show

# Exercises (1):

## 1.) Change the color of one cluster and the shape of another.

In [None]:
fig = plt.figure()
ax1 = fig.add_subplot()
ax1.scatter(x1, y1, label = iris.target_names[0])
ax1.scatter(x2, y2, label = iris.target_names[1])
ax1.scatter(x3, y3, label = iris.target_names[2])
plt.legend()
plt.xlabel(xName)
plt.ylabel(yName)
plt.grid(linestyle = "--", linewidth = 0.5)
plt.show

## 2.) Remove the versacolor cluster from the graph.

In [None]:
fig = plt.figure()
ax1 = fig.add_subplot()
ax1.scatter(x1, y1, label = iris.target_names[0])
ax1.scatter(x2, y2, label = iris.target_names[1])
ax1.scatter(x3, y3, label = iris.target_names[2])
plt.legend()
plt.xlabel(xName)
plt.ylabel(yName)
plt.grid(linestyle = "--", linewidth = 0.5)
plt.show

## 3.) Play around with parameters in the next cell. Change colors, size, opacity, and or variables. (You can change almost anyting in the graph so feel free to explore)

In [None]:
fig = plt.figure()
ax1 = fig.add_subplot()
ax1.scatter(x1, y1, label = iris.target_names[0])
ax1.scatter(x2, y2, label = iris.target_names[1])
ax1.scatter(x3, y3, label = iris.target_names[2])
plt.legend()
plt.xlabel(xName)
plt.ylabel(yName)
plt.grid(linestyle = "--", linewidth = 0.5)
plt.show

# The matplotlib tools come with a wide variety of graphs to represent data.

In [None]:
fig, ax = plt.subplots()
ax.set_yticklabels(iris.target_names)
ax.set_xlabel(xName)
ax.set_yticks(np.arange(1, len(iris.target_names) + 1))
ax.violinplot([x1,x2,x3],positions =[3,2,1],vert=False,showmeans=True)




# The functionality of matplotlib extends beyond only representing data and can intergrate a variety of analyses too.

For example, we can easily disply the means of each meausre for all three types of iris.

In [None]:
x = Select_iris[:,2]
xLabel = iris.feature_names[2]
y = Select_iris[:,3]
yLabel = iris.feature_names[3]

In [None]:
# Get averages
averages = [Select_iris[Target_iris == i].mean(axis=0) for i in range (classes)]

# Arange data
x = np.arange(len(iris.feature_names))

# Generate bar char
fig = plt.figure(figsize = (10,10))
ax = fig.add_subplot()
bar1 = ax.bar(x - 0.25, averages[0], 0.25, label = iris.target_names[0])
bar2 = ax.bar(x, averages[1], 0.25, label = iris.target_names[1])
bar3 = ax.bar(x + 0.25, averages[2], 0., label = iris.target_names[2])
ax.set_xticks(x)
ax.set_xticklabels(iris.feature_names)
plt.legend()
plt.title("Bar Chart Iris Averages")
plt.ylabel("Average")
plt.show

## When combined with linear regression, we can add the line of best fit to the data.

In [None]:
fig = plt.figure()
ax1 = fig.add_subplot()
ax1.scatter(x1, y1, label = iris.target_names[0])
ax1.scatter(x2, y2, label = iris.target_names[1])
ax1.scatter(x3, y3, label = iris.target_names[2])
plt.legend()
plt.xlabel(xLabel)
plt.ylabel(yLabel)
plt.grid(linestyle = "--", linewidth = 0.5)


m1, b1 = np.polyfit(x1, y1, 1)
m2, b2 = np.polyfit(x2, y2, 1)
m3, b3 = np.polyfit(x3, y3, 1)
plt.plot(iris1[:,3], b1+m1*iris1[:,3], color = 'red')
plt.plot(iris2[:,3], b2+m2*iris2[:,3], color = 'red')
plt.plot(iris3[:,3], b3+m3*iris3[:,3], color = 'red')


plt.show


We can further extend these lines by ploting between the minimum and maximum x values from the datasets.

In [None]:
fig = plt.figure()
ax1 = fig.add_subplot()
ax1.scatter(x1, y1, label = iris.target_names[0])
ax1.scatter(x2, y2, label = iris.target_names[1])
ax1.scatter(x3, y3, label = iris.target_names[2])
plt.legend()
plt.xlabel(xLabel)
plt.ylabel(yLabel)
plt.grid(linestyle = "--", linewidth = 0.5)


poly1 = np.polyfit(x1, y1, 1)
poly2 = np.polyfit(x2, y2, 1)
poly3 = np.polyfit(x3, y3, 1)

est1 = np.poly1d(poly1)
est2 = np.poly1d(poly2)
est3 = np.poly1d(poly3)

irisCompare = np.concatenate((x1,x2,x3))
xmin = np.min(irisCompare)
xmax = np.max(irisCompare)
print(xmin)
print(xmax)
plt.plot((xmin, xmax), (est1(xmin), est1(xmax)), 'blue')
plt.plot((xmin, xmax), (est2(xmin), est2(xmax)), 'orange')
plt.plot((xmin, xmax), (est3(xmin), est3(xmax)), 'green')





plt.show


In [None]:
from sklearn.cluster import KMeans
kmeans1 = KMeans(n_clusters = 1).fit(iris1) 
kmeans2 = KMeans(n_clusters = 1).fit(iris2)
kmeans3 = KMeans(n_clusters = 1).fit(iris3)
print(kmeans1.cluster_centers_)
print(kmeans2.cluster_centers_)
print(kmeans3.cluster_centers_)
cen_x = [kmeans1.cluster_centers_[0,3], kmeans2.cluster_centers_[0,3], kmeans3.cluster_centers_[0,3]]
cen_y = [kmeans1.cluster_centers_[0,4], kmeans2.cluster_centers_[0,4], kmeans3.cluster_centers_[0,4]]

In [None]:
fig = plt.figure()
ax1 = fig.add_subplot()
ax1.scatter(x1, y1, s = iris1[:,2]*50, alpha = 0.2, label = iris.target_names[0])
ax1.scatter(x2, y2, s = iris2[:,2]*50, alpha = 0.2, label = iris.target_names[1])
ax1.scatter(x3, y3, s = iris3[:,2]*50, alpha = 0.2, label = iris.target_names[2])
ax1.scatter(cen_x, cen_y, c='red', marker='^', s = 100)
plt.legend()
plt.xlabel(xLabel)
plt.ylabel(yLabel)
plt.grid(linestyle = "--", linewidth = 0.5)
plt.show

In [None]:
fig = plt.figure()
ax1 = fig.add_subplot()
ax1.scatter(x1, y1, s = iris1[:,2]*50, alpha = 0.2, label = iris.target_names[0])
ax1.scatter(x2, y2, s = iris2[:,2]*50, alpha = 0.2, label = iris.target_names[1])
ax1.scatter(x3, y3, s = iris3[:,2]*50, alpha = 0.2, label = iris.target_names[2])
plt.legend()
plt.xlabel(xLabel)
plt.ylabel(yLabel)
plt.grid(linestyle = "--", linewidth = 0.5)

for i in range(len((x1))):
    x = x1[i], cen_x[0]
    y = y1[i], cen_y[0]
    plt.plot(x,y, c= 'blue', alpha = .6)

for i in range(len((x2))):
    x = x2[i], cen_x[1]
    y = y2[i], cen_y[1]
    plt.plot(x,y, c='orange', alpha = .6)

for i in range(len((x3))):
    x = x3[i], cen_x[2]
    y = y3[i], cen_y[2]
    plt.plot(x,y, c = 'green', alpha = .6)    

ax1.scatter(cen_x, cen_y, c='red', marker='^', s = 100)
plt.show

In [None]:
fig = plt.figure()
ax1 = fig.add_subplot()
ax1.scatter(x3, y3, alpha = 0.6, label = iris.target_names[2], c = 'green')
plt.legend()
plt.xlabel(xLabel)
plt.ylabel(yLabel)
plt.grid(linestyle = "--", linewidth = 0.5)

for i in range(len((x3))):
    x = x3[i], cen_x[2]
    y = y3[i], cen_y[2]
    plt.plot(x,y, c = 'green', alpha = .2)    

ax1.scatter(cen_x[2], cen_y[2], c='red', marker='^', s = 100)
plt.show

# Exercises (2):

## 1.) Generate 2 scatter plots (1 per cell) with different shaped points using sepal length and width (feel free to copy and paste prior code).

## 2.) Generate a regression line for the total iris sample (one line for all three species).

## 3.) Find the centroid of another iris metric and generate a scatterplot with lines to the centroid for each species independently.

# Nilearn

So what about brain data? The ability to generate reproducable and customizable plots is awesome, but how do we go about doing this with brain data? Well first of all most neuroscience data can be converted into as easily plotable form, but that does not mean that tools to work directly with neuroimaging data don't exist. Nilearn is an example of one such tool.

In [None]:
from nilearn import plotting  
dmn_coords = [(0, -52, 18), (-46, -68, 32), (46, -68, 32), (1, 50, -5)] 
dmn_colors = ['red', 'cyan', 'magenta', 'orange']
marker_size = [25, 35, 45, 55]
dmn_tags = ['PCC', 'Parietal', 'Parietal', 'MFC']

view = plotting.view_markers(dmn_coords, dmn_colors, marker_size, dmn_tags) 
view.open_in_browser() 

## You may notice the structure is similar to matplotlib. Similarly the elements are customizable, feel free to change any item in the lists above to change the figure.
More complex forms of graphing are also available.

In [None]:
from nilearn import datasets

dataset = datasets.fetch_atlas_harvard_oxford('cort-maxprob-thr25-2mm')
atlas_filename = dataset.maps
labels = dataset.labels

print('Atlas ROIs are located in nifti image (4D) at: %s' %
      atlas_filename)  # 4D data

# One subject of brain development fmri data
data = datasets.fetch_development_fmri(n_subjects=1)
fmri_filenames = data.func[0]

In [None]:
from nilearn.input_data import NiftiLabelsMasker
masker = NiftiLabelsMasker(labels_img=atlas_filename, standardize=True,
                           memory='nilearn_cache', verbose=5)

# Here we go from nifti files to the signal time series in a numpy
# array. Note how we give confounds to be regressed out during signal
# extraction
time_series = masker.fit_transform(fmri_filenames, confounds=data.confounds)

In [None]:
from nilearn.connectome import ConnectivityMeasure
correlation_measure = ConnectivityMeasure(kind='correlation')
correlation_matrix = correlation_measure.fit_transform([time_series])[0]

# Plot the correlation matrix
import numpy as np
from nilearn import plotting
# Make a large figure
# Mask the main diagonal for visualization:
np.fill_diagonal(correlation_matrix, 0)
# The labels we have start with the background (0), hence we skip the
# first label
# matrices are ordered for block-like representation
plotting.plot_matrix(correlation_matrix, figure=(10, 8), labels=labels[1:], tri = 'lower',
                     vmax=0.8, vmin=-0.8, reorder=True)