# Understanding Autism in ML

In [74]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import train_test_split as tts
import sklearn.metrics as met

## Data processing

In [117]:
df = pd.read_csv('data.csv')
df.drop(['Case_No'], axis=1, inplace=True)

Check for class imbalance if its there we have to chose a different metric/ or do class balancing

In [64]:
df['Class/ASD Traits '].value_counts()

1    728
0    326
Name: Class/ASD Traits , dtype: int64

In [118]:
df.dtypes

A1                         int64
A2                         int64
A3                         int64
A4                         int64
A5                         int64
A6                         int64
A7                         int64
A8                         int64
A9                         int64
A10                        int64
Age_Mons                   int64
Qchat-10-Score             int64
Sex                       object
Ethnicity                 object
Jaundice                  object
Family_mem_with_ASD       object
Who completed the test    object
Class/ASD Traits          object
dtype: object

In [119]:
df.isnull().sum()

A1                        0
A2                        0
A3                        0
A4                        0
A5                        0
A6                        0
A7                        0
A8                        0
A9                        0
A10                       0
Age_Mons                  0
Qchat-10-Score            0
Sex                       0
Ethnicity                 0
Jaundice                  0
Family_mem_with_ASD       0
Who completed the test    0
Class/ASD Traits          0
dtype: int64

Doing label and one-hot encoding of the string variables

In [120]:
def pre(d_f):
    """
    Function returns pre-processed data for training
    """
    
    lab = LabelEncoder()
    d_f['Sex'] = lab.fit_transform(d_f['Sex'])
    d_f['Ethnicity'] = lab.fit_transform(d_f['Ethnicity'])
    d_f['Jaundice'] = lab.fit_transform(d_f['Jaundice'])
    d_f['Family_mem_with_ASD'] = lab.fit_transform(d_f['Family_mem_with_ASD'])
    d_f['Who completed the test'] = lab.fit_transform(d_f['Who completed the test'])
    d_f['Class/ASD Traits '] = lab.fit_transform(d_f['Class/ASD Traits '])
    
    c_c = ['Ethnicity', 'Who completed the test']
    me = OneHotEncoder(sparse=False, handle_unknown='ignore')
    me.fit(d_f[c_c])
    df_new_ = pd.concat([d_f.drop(c_c, 1), pd.DataFrame(myEncoder.transform(d_f[c_c]))], axis=1).reindex()
    
    return df_new_

df_tr = pre(df)

In [122]:
df_tr_x = df_tr.drop(['Class/ASD Traits '],1)
df_tr_y = df_tr['Class/ASD Traits ']

## Logistic Regression

In [123]:
tr_x, te_x, tr_y, te_y = tts(df_tr_x, df_tr_y, train_size=.8, random_state=277)

In [124]:
log = LogisticRegression()
model = log.fit(tr_x,tr_y)

In [125]:
pred = model.predict(te_x)

In [126]:
met.confusion_matrix(te_y, pred)

array([[ 59,   4],
       [  0, 148]])

In [127]:
f_1 = met.f1_score(te_y, pred)
prec_ = met.f1_score(te_y, pred)

In [128]:
df.columns

Index(['A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9', 'A10', 'Age_Mons',
       'Qchat-10-Score', 'Sex', 'Ethnicity', 'Jaundice', 'Family_mem_with_ASD',
       'Who completed the test', 'Class/ASD Traits '],
      dtype='object')

## Prediction of values

Getting the inputs

In [239]:
def get_input():
    """
    Returns processed form of input for prediction
    """
    in_ = input()
    in_ = in_.split(',')
    num_ = [float(x) for x in in_ if in_.index(x) < 12]
    in_ = num_ + in_[12:]
    return in_
inp_ = get_input()

1,1,1,1,1,1,1,0,1,1,28,9,m,middle eastern,no,no,family member


In [240]:
def pre_pd(d_f):
    """
    Function returns pre-processed data for training
    """
    
    lab = LabelEncoder()
    d_f['Sex'] = lab.fit_transform(d_f['Sex'])
    d_f['Ethnicity'] = lab.fit_transform(d_f['Ethnicity'])
    d_f['Jaundice'] = lab.fit_transform(d_f['Jaundice'])
    d_f['Family_mem_with_ASD'] = lab.fit_transform(d_f['Family_mem_with_ASD'])
    d_f['Who completed the test'] = lab.fit_transform(d_f['Who completed the test'])
    #d_f['Class/ASD Traits '] = lab.fit_transform(d_f['Class/ASD Traits '])
    
    c_c = ['Ethnicity', 'Who completed the test']
    me = OneHotEncoder(sparse=False, handle_unknown='ignore')
    me.fit(d_f[c_c])
    df_new_ = pd.concat([d_f.drop(c_c, 1), pd.DataFrame(myEncoder.transform(d_f[c_c]))], axis=1).reindex()
    
    return df_new_

In [243]:
def results(df_, iput_, model):
    """
    returns the result of the given input
    """
    columns_n = df_.columns[:-1]
    inp_df = pd.DataFrame([iput_])
    inp_df.columns = columns_n
    ch_c = pre_pd(inp_df)
    output = model.predict(ch_c)
    if output == 0:
        print('NO')
    else:
        print('YES')

In [242]:
results(df, inp_, model)

YES
