In [3]:
from sklearn import datasets
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split

from imblearn import over_sampling as os
from imblearn import combine,ensemble
from imblearn import under_sampling as us
from imblearn.metrics import classification_report_imbalanced

import numpy as np

In [4]:
RANDOM_STATE = 42

# Generate a dataset
X, y = datasets.make_classification(n_classes=2, class_sep=2,
                                    weights=[0.1, 0.9], n_informative=10,
                                    n_redundant=1, flip_y=0, n_features=20,
                                    n_clusters_per_class=4, n_samples=5000,
                                    random_state=RANDOM_STATE)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    random_state=RANDOM_STATE)
unique, counts = np.unique(y_train, return_counts=True)

print np.asarray((unique, counts)).T

[[   0  377]
 [   1 3373]]


In [5]:
#Over Sampling methods
smote = os.SMOTE(random_state=RANDOM_STATE)  #This object is an implementation of SMOTE - Synthetic Minority Over-sampling 
                                             # Technique,and the variants Borderline SMOTE 1, 2 and SVM-SMOTE
smote = os.ADASYN(random_state=RANDOM_STATE)  #Adaptive Synthetic Sampling Approach for Imbalanced Learning.
smote = os.RandomOverSampler(random_state=RANDOM_STATE)  #Random Over sampling

#Under Sampling methods
smote = us.ClusterCentroids() #generating centroids based on clustering methods
smote = us.CondensedNearestNeighbour() #under-sampling based on the condensed nearest neighbour method
smote = us.EditedNearestNeighbours() # under-sampling based on the edited nearest neighbour method.
smote = us.RepeatedEditedNearestNeighbours() #under-sampling based on the repeated edited nearest neighbour method.
smote = us.AllKNN()   # under-sampling based on the AllKNN method
smote = us.InstanceHardnessThreshold()    #under-sampling based on the instance hardness threshold.
smote = us.NearMiss(version=2, random_state=RANDOM_STATE)    #under-sampling based on NearMiss methods
smote = us.NeighbourhoodCleaningRule()   #under-sampling based on the neighbourhood cleaning rule
smote = us.OneSidedSelection()  #under-sampling based on one-sided selection method
smote = us.RandomUnderSampler() #Under-sample the majority class(es) by randomly picking samples with or without replacement
smote = us.TomekLinks() #under-sampling by removing Tomek’s links

#Over and Under Sampling
smote = combine.SMOTEENN()   #over- and under-sampling using SMOTE and Edited Nearest Neighbours
smote = combine.SMOTETomek()  #Combine over- and under-sampling using SMOTE and Tomek links

#Ensemble models
smote = ensemble.BalanceCascade()  #iterative undersampling
smote = ensemble.EasyEnsemble()  #iterative random undersampling

In [6]:
# Sample the data with smote
X_balanced,Y_balanced = smote.fit_sample(X_train, y_train)

unique, counts = np.unique(Y_balanced, return_counts=True)

print np.asarray((unique, counts)).T

[[   0 3770]
 [   1 3770]]
