# <center>  AUTISM DETECTION MODEL CREATION</center>
### Importing the Dependencies

In [24]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

### Data Collection

In [25]:
autism_data=pd.read_csv('./data.csv')

### About the dataset

In [26]:
autism_data.shape

(1054, 19)

In [27]:
autism_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1054 entries, 0 to 1053
Data columns (total 19 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Case_No                 1054 non-null   int64 
 1   A1                      1054 non-null   int64 
 2   A2                      1054 non-null   int64 
 3   A3                      1054 non-null   int64 
 4   A4                      1054 non-null   int64 
 5   A5                      1054 non-null   int64 
 6   A6                      1054 non-null   int64 
 7   A7                      1054 non-null   int64 
 8   A8                      1054 non-null   int64 
 9   A9                      1054 non-null   int64 
 10  A10                     1054 non-null   int64 
 11  Age_Mons                1054 non-null   int64 
 12  Qchat-10-Score          1054 non-null   int64 
 13  Sex                     1054 non-null   object
 14  Ethnicity               1054 non-null   object
 15  Jaun

In [28]:
autism_data.describe()

Unnamed: 0,Case_No,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,Age_Mons,Qchat-10-Score
count,1054.0,1054.0,1054.0,1054.0,1054.0,1054.0,1054.0,1054.0,1054.0,1054.0,1054.0,1054.0,1054.0
mean,527.5,0.563567,0.448767,0.401328,0.512334,0.524668,0.57685,0.649905,0.459203,0.489564,0.586338,27.867173,5.212524
std,304.407895,0.496178,0.497604,0.4904,0.500085,0.499628,0.494293,0.477226,0.498569,0.500128,0.492723,7.980354,2.907304
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0
25%,264.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23.0,3.0
50%,527.5,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,30.0,5.0
75%,790.75,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,36.0,8.0
max,1054.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,36.0,10.0


In [29]:
autism_data['Class/ASD Traits '].value_counts()

Class/ASD Traits 
Yes    728
No     326
Name: count, dtype: int64

Yes --> Autistic Person<br>
No --> Non Autistic Person

### Feature Encoding

In [30]:
le = LabelEncoder()
columns = ['Ethnicity', 'Family_mem_with_ASD', 'Class/ASD Traits ', 'Sex', 'Jaundice']
for col in columns:
    autism_data[col] = le.fit_transform(autism_data[col])
autism_data.dtypes

Case_No                    int64
A1                         int64
A2                         int64
A3                         int64
A4                         int64
A5                         int64
A6                         int64
A7                         int64
A8                         int64
A9                         int64
A10                        int64
Age_Mons                   int64
Qchat-10-Score             int64
Sex                        int32
Ethnicity                  int32
Jaundice                   int32
Family_mem_with_ASD        int32
Who completed the test    object
Class/ASD Traits           int32
dtype: object

### Splitting Features and target

In [31]:
#Selecting only required features
X=autism_data.drop(['Case_No', 'Who completed the test','Age_Mons','Qchat-10-Score','Class/ASD Traits '], axis = 1)
Y=autism_data['Class/ASD Traits ']

In [32]:
print(X)

      A1  A2  A3  A4  A5  A6  A7  A8  A9  A10  Sex  Ethnicity  Jaundice  \
0      0   0   0   0   0   0   1   1   0    1    0          8         1   
1      1   1   0   0   0   1   1   0   0    0    1          5         1   
2      1   0   0   0   0   0   1   1   0    1    1          8         1   
3      1   1   1   1   1   1   1   1   1    1    1          0         0   
4      1   1   0   1   1   1   1   1   1    1    0          5         0   
...   ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...        ...       ...   
1049   0   0   0   0   0   0   0   0   0    1    0          5         0   
1050   0   0   1   1   1   0   1   0   1    0    1          7         1   
1051   1   0   1   1   1   1   1   1   1    1    1          8         1   
1052   1   0   0   0   0   0   0   1   0    1    1          5         0   
1053   1   1   0   0   1   1   0   1   1    0    1          6         1   

      Family_mem_with_ASD  
0                       0  
1                       0  
2              

In [33]:
print(Y)

0       0
1       1
2       1
3       1
4       1
       ..
1049    0
1050    1
1051    1
1052    0
1053    1
Name: Class/ASD Traits , Length: 1054, dtype: int32


### Splitting the Data into Training data & Test Data

In [34]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)
print(X.shape, X_train.shape, X_test.shape)

(1054, 14) (843, 14) (211, 14)


In [35]:
print(X_train)

     A1  A2  A3  A4  A5  A6  A7  A8  A9  A10  Sex  Ethnicity  Jaundice  \
981   0   0   0   1   1   0   1   1   0    1    1         10         1   
492   1   0   0   1   1   0   1   0   1    1    1          5         1   
655   1   0   0   0   1   1   1   1   0    1    0          6         0   
336   0   0   0   1   0   0   0   1   0    0    1          6         0   
89    1   1   1   1   1   1   1   1   1    0    1          6         0   
..   ..  ..  ..  ..  ..  ..  ..  ..  ..  ...  ...        ...       ...   
97    1   0   0   0   1   0   1   1   1    1    1          6         0   
793   1   1   1   1   1   1   1   0   1    1    1          9         0   
101   0   0   0   0   0   1   1   1   0    1    0          8         0   
246   1   0   0   1   1   1   0   0   1    1    1          6         0   
281   1   1   1   1   1   1   1   1   1    0    1          5         0   

     Family_mem_with_ASD  
981                    0  
492                    0  
655                    0  
336

In [36]:
print(Y_train)

981    1
492    1
655    1
336    0
89     1
      ..
97     1
793    1
101    1
246    1
281    1
Name: Class/ASD Traits , Length: 843, dtype: int32


### Training Model using Logistic Regression

In [37]:
model = LogisticRegression()

In [38]:
model.fit(X_train,Y_train)

### Accuracy on Training Data

In [39]:
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print('Accuracy on Training data : ', training_data_accuracy)

Accuracy on Training data :  1.0


### Accuracy on Testing Data

In [40]:
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print('Accuracy on Test data : ', test_data_accuracy)

Accuracy on Test data :  1.0


### Test Cases
Test Case 1:

In [41]:
input_data = (0,0,0,1,1,0,1,1,0,1,1,10,1,0)

# change the input data to a numpy array
input_data_as_numpy_array= np.asarray(input_data)

# reshape the numpy array as we are predicting for only on instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = model.predict(input_data_reshaped)
print(prediction)

if (prediction[0]== 0):
  print('The Person is not Autistic')
else:
  print('The Person is Autistic')

[1]
The Person is Autistic




Test Case 2:

In [42]:
input_data = (0,0,0,1,0,0,0,1,0,0,1,6,0,0)
  
# change the input data to a numpy array
input_data_as_numpy_array= np.asarray(input_data)

# reshape the numpy array as we are predicting for only on instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = model.predict(input_data_reshaped)
print(prediction)

if (prediction[0]== 0):
  print('The Person is not Autistic')
else:
  print('The Person is Autistic')

[0]
The Person is not Autistic




### Model Creation

In [43]:
import pickle

filename = 'autism_model.sav'
pickle.dump(model, open(filename, 'wb'))     

loaded_model = pickle.load(open(filename, 'rb'))

# <center> Model Successfully Created </center>