# Some machine learning examples

## Load libraries

In [4]:
import numpy as np
from nilearn import datasets
from nilearn import input_data
from nilearn import plotting

## Load datasets

In [5]:
# Load fMRI data
dataset = datasets.fetch_development_fmri(n_subjects=1)
func_filename = dataset.func[0]
confounds_filename = dataset.confounds[0]

# Load atlas data
power = datasets.fetch_coords_power_2011()

## Extract signals from parcellation

In [6]:
# Extract coordinates
coords = np.vstack((power.rois['x'], power.rois['y'], power.rois['z'])).T

# Create masker object (spheres with 5 radius)
spheres_masker = input_data.NiftiSpheresMasker(
    seeds=coords, radius=5, detrend=True, standardize=True)


# Extract timeseries from ROIs
timeseries = spheres_masker.fit_transform(func_filename)

# Plot shape of timeseries
timeseries.shape

(168, 264)

## Run k-means clustering on timeseries

The `KMeans` algorithm clusters data by trying to separate samples in $n$ groups of equal variance, minimizing a criterion known as the inertia or within-cluster sum-of-squares. This algorithm requires the number of clusters to be specified. It scales well to large number of samples and has been used across a large range of application areas in many different fields ([more](https://scikit-learn.org/stable/modules/clustering.html#k-means)).


In [7]:
from sklearn.cluster import KMeans

# Creare KMeans object and specify n_cluster as 5
clustering = KMeans(n_clusters=5)

# Cluster timeseries using .fit method
clustering.fit(timeseries)

KMeans(n_clusters=5)

In [10]:
# Print clustering labels & shape
clustering.labels_

array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, 4, 2,
       4, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 4, 2, 2, 4, 4, 4, 2, 2, 2, 2, 4,
       4, 4, 2, 2, 0, 2, 4, 4, 4, 2, 0, 2, 0, 2, 0, 2, 0, 4, 4, 4, 4, 2,
       1, 1, 1, 2, 0, 0, 0, 0, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 4, 4, 2, 2,
       2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 2, 0, 0, 0, 0, 0,
       4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 4, 1, 1, 1, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 3, 3, 0, 2, 2, 2, 3, 2, 2, 2,
       3, 2, 2, 2, 0, 0, 0, 2, 2, 1, 1, 1, 3, 3], dtype=int32)

In [11]:
# Cluster brain regions
clustering.fit(timeseries.T)

KMeans(n_clusters=5)

In [12]:
# Print clustering labels & shape
clustering.labels_

array([2, 2, 3, 0, 2, 3, 3, 1, 2, 2, 0, 4, 1, 3, 4, 4, 4, 2, 1, 2, 1, 4,
       4, 4, 3, 1, 4, 4, 4, 1, 0, 1, 1, 0, 0, 4, 2, 0, 4, 0, 4, 3, 4, 4,
       3, 3, 4, 3, 2, 4, 4, 3, 2, 3, 3, 3, 2, 3, 3, 3, 0, 3, 4, 2, 3, 2,
       3, 3, 3, 4, 3, 1, 0, 1, 1, 0, 4, 2, 0, 0, 2, 2, 0, 0, 3, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 0, 0, 3, 2, 2, 0, 3, 3,
       0, 0, 3, 0, 0, 0, 4, 0, 1, 2, 4, 3, 2, 4, 1, 2, 3, 2, 0, 0, 0, 2,
       0, 0, 0, 0, 0, 2, 2, 1, 2, 2, 4, 1, 4, 4, 1, 4, 1, 1, 4, 4, 1, 1,
       1, 3, 1, 1, 3, 4, 1, 1, 4, 1, 2, 4, 4, 1, 1, 4, 1, 1, 1, 0, 3, 3,
       0, 0, 1, 3, 3, 2, 2, 3, 3, 1, 1, 3, 3, 4, 1, 0, 4, 0, 0, 0, 3, 0,
       1, 3, 0, 0, 3, 3, 4, 3, 3, 3, 3, 3, 3, 4, 3, 3, 4, 3, 3, 3, 3, 0,
       0, 2, 4, 1, 4, 2, 2, 2, 4, 3, 3, 4, 4, 3, 1, 1, 3, 1, 1, 3, 3, 2,
       2, 2, 4, 2, 1, 2, 2, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4],
      dtype=int32)

## Run PCA on timeseries

PCA - linear dimensionality reduction using Singular Value Decomposition of the data to project it to a lower dimensional space.

In [23]:
from sklearn.decomposition import PCA 

decomposition = PCA(n_components=10)
decomposition.fit(timeseries)

# Print shape of components

# Print variance eplained by each component

PCA(copy=True, iterated_power='auto', n_components=10, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)