In [72]:
from pprint import pprint

In [73]:
import warnings
warnings.filterwarnings('ignore')

In [74]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Perceptron
from sklearn import svm
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import classification_report

In [196]:
df = pd.read_csv('datasets_melanoma.csv')
describe_features = {}
for img in set(df.img.values):
    df_img = df[df.img==img][
        ['energy','entropy', 'kurtosis', 
         'max','mean', 'min', 'moment_3',
         'moment_4','skew', 'std', 'sum', 'var', 'color']
    ]
    describe_features[img] = []
    describe_features[img].extend(df_img[df_img.color == 'red'].describe().ix[['mean', 'std']].values.ravel())
    describe_features[img].extend(df_img[df_img.color == 'green'].describe().ix[['mean', 'std']].values.ravel())
    describe_features[img].extend(df_img[df_img.color == 'blue'].describe().ix[['mean', 'std']].values.ravel())
    # describe_features[img].extend(df_img[df_img.color == 'gray'].describe().ix[['mean', 'std']].values.ravel())

In [197]:
df1 = pd.read_csv('datasets_naevus.csv')
describe_features1 = {}
for img1 in set(df1.img.values):
    df_img1 = df1[df1.img==img1][
        ['energy','entropy', 'kurtosis', 
         'max','mean', 'min', 'moment_3',
         'moment_4','skew', 'std', 'sum', 'var', 'color']
    ]
    
    describe_features1[img1] = []
    describe_features1[img1].extend(df_img1[df_img1.color == 'red'].describe().ix[['mean', 'std']].values.ravel())
    describe_features1[img1].extend(df_img1[df_img1.color == 'green'].describe().ix[['mean', 'std']].values.ravel())
    describe_features1[img1].extend(df_img1[df_img1.color == 'blue'].describe().ix[['mean', 'std']].values.ravel())
    # describe_features1[img1].extend(df_img1[df_img1.color == 'gray'].describe().ix[['mean', 'std']].values.ravel())

In [198]:
X = [describe_features[img] for img in set(df.img.values)]
y = [1. for _ in X]
leny = len(y)
X.extend([describe_features1[img] for img in set(df1.img.values)])
y = [1. if i < leny else 0. for i,_ in enumerate(X)]

In [213]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=42)

## Classification

In [214]:
clf = Perceptron()
clf = clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
pprint(clf.get_params())

              precision    recall  f1-score   support

         0.0       0.60      1.00      0.75         9
         1.0       1.00      0.25      0.40         8

    accuracy                           0.65        17
   macro avg       0.80      0.62      0.57        17
weighted avg       0.79      0.65      0.59        17

{'alpha': 0.0001,
 'class_weight': None,
 'early_stopping': False,
 'eta0': 1.0,
 'fit_intercept': True,
 'max_iter': 1000,
 'n_iter_no_change': 5,
 'n_jobs': None,
 'penalty': None,
 'random_state': 0,
 'shuffle': True,
 'tol': 0.001,
 'validation_fraction': 0.1,
 'verbose': 0,
 'warm_start': False}


In [215]:
clf = SGDClassifier()
clf = clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
pprint(clf.get_params())

              precision    recall  f1-score   support

         0.0       0.73      0.89      0.80         9
         1.0       0.83      0.62      0.71         8

    accuracy                           0.76        17
   macro avg       0.78      0.76      0.76        17
weighted avg       0.78      0.76      0.76        17

{'alpha': 0.0001,
 'average': False,
 'class_weight': None,
 'early_stopping': False,
 'epsilon': 0.1,
 'eta0': 0.0,
 'fit_intercept': True,
 'l1_ratio': 0.15,
 'learning_rate': 'optimal',
 'loss': 'hinge',
 'max_iter': 1000,
 'n_iter_no_change': 5,
 'n_jobs': None,
 'penalty': 'l2',
 'power_t': 0.5,
 'random_state': None,
 'shuffle': True,
 'tol': 0.001,
 'validation_fraction': 0.1,
 'verbose': 0,
 'warm_start': False}


In [216]:
clf = svm.SVC(gamma='scale', decision_function_shape='ovo')
clf = clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
pprint(clf.get_params())

              precision    recall  f1-score   support

         0.0       0.86      0.67      0.75         9
         1.0       0.70      0.88      0.78         8

    accuracy                           0.76        17
   macro avg       0.78      0.77      0.76        17
weighted avg       0.78      0.76      0.76        17

{'C': 1.0,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovo',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}


In [217]:
clf = svm.LinearSVC(max_iter=10000, tol=0.001, multi_class='crammer_singer')
clf = clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
pprint(clf.get_params())

              precision    recall  f1-score   support

         0.0       0.71      0.56      0.63         9
         1.0       0.60      0.75      0.67         8

    accuracy                           0.65        17
   macro avg       0.66      0.65      0.65        17
weighted avg       0.66      0.65      0.64        17

{'C': 1.0,
 'class_weight': None,
 'dual': True,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'loss': 'squared_hinge',
 'max_iter': 10000,
 'multi_class': 'crammer_singer',
 'penalty': 'l2',
 'random_state': None,
 'tol': 0.001,
 'verbose': 0}


In [218]:
SGDC = SGDClassifier(
    shuffle=False,
)
parameters = {
    'loss': ('log', 'modified_huber', 'hinge'), 
    'tol': [0.00001, 0.0001, 0.001, 0.01, 0.1], 
    'penalty': ('elasticnet', 'l2', 'l1'),
    'validation_fraction': [0.60, 0.50, 0.40, 0.30, 0.20, 0.10],
    'max_iter' : [10000, 1000, 100],
    'average': [False, True],
}
clf = GridSearchCV(SGDC, parameters, cv=5)
clf = clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
pprint(clf.get_params())

              precision    recall  f1-score   support

         0.0       0.67      0.67      0.67         9
         1.0       0.62      0.62      0.62         8

    accuracy                           0.65        17
   macro avg       0.65      0.65      0.65        17
weighted avg       0.65      0.65      0.65        17

{'cv': 5,
 'error_score': 'raise-deprecating',
 'estimator': SGDClassifier(alpha=0.0001, average=False, class_weight=None,
              early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
              l1_ratio=0.15, learning_rate='optimal', loss='hinge',
              max_iter=1000, n_iter_no_change=5, n_jobs=None, penalty='l2',
              power_t=0.5, random_state=None, shuffle=False, tol=0.001,
              validation_fraction=0.1, verbose=0, warm_start=False),
 'estimator__alpha': 0.0001,
 'estimator__average': False,
 'estimator__class_weight': None,
 'estimator__early_stopping': False,
 'estimator__epsilon': 0.1,
 'estimator__eta0': 0.0,
 'e

## Regression

In [219]:
clf = LinearRegression()
clf = clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(r2_score(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))
pprint(clf.get_params())

-0.5930847402902162
0.3968930840861438
{'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'normalize': False}
