In [2]:
import pandas
import plotly.express as px
from scipy.io.arff import loadarff

file_path = "./datasets/Autism-Adult-Data.arff"

raw_data = loadarff(file_path)
df = pandas.DataFrame(raw_data[0])

df.head()

Unnamed: 0,A1_Score,A2_Score,A3_Score,A4_Score,A5_Score,A6_Score,A7_Score,A8_Score,A9_Score,A10_Score,...,gender,ethnicity,jundice,austim,contry_of_res,used_app_before,result,age_desc,relation,Class/ASD
0,b'1',b'1',b'1',b'1',b'0',b'0',b'1',b'1',b'0',b'0',...,b'f',b'White-European',b'no',b'no',b'United States',b'no',6.0,b'18 and more',b'Self',b'NO'
1,b'1',b'1',b'0',b'1',b'0',b'0',b'0',b'1',b'0',b'1',...,b'm',b'Latino',b'no',b'yes',b'Brazil',b'no',5.0,b'18 and more',b'Self',b'NO'
2,b'1',b'1',b'0',b'1',b'1',b'0',b'1',b'1',b'1',b'1',...,b'm',b'Latino',b'yes',b'yes',b'Spain',b'no',8.0,b'18 and more',b'Parent',b'YES'
3,b'1',b'1',b'0',b'1',b'0',b'0',b'1',b'1',b'0',b'1',...,b'f',b'White-European',b'no',b'yes',b'United States',b'no',6.0,b'18 and more',b'Self',b'NO'
4,b'1',b'0',b'0',b'0',b'0',b'0',b'0',b'1',b'0',b'0',...,b'f',b'?',b'no',b'no',b'Egypt',b'no',2.0,b'18 and more',b'?',b'NO'


In [3]:
df.dtypes

A1_Score            object
A2_Score            object
A3_Score            object
A4_Score            object
A5_Score            object
A6_Score            object
A7_Score            object
A8_Score            object
A9_Score            object
A10_Score           object
age                float64
gender              object
ethnicity           object
jundice             object
austim              object
contry_of_res       object
used_app_before     object
result             float64
age_desc            object
relation            object
Class/ASD           object
dtype: object

In [4]:
# Values for ethnicity, country of residence, age_desc and relation
print("Range of ethnicity:")
print(df["ethnicity"].unique())
print("")

print("Range of contry_of_res:")
print(df["contry_of_res"].unique())
print("")

print("Range of age_desc:")
print(df["age_desc"].unique())
print("")

print("Range of relation:")
print(df["relation"].unique())
print("")

Range of ethnicity:
[b'White-European' b'Latino' b'?' b'Others' b'Black' b'Asian'
 b'Middle Eastern ' b'Pasifika' b'South Asian' b'Hispanic' b'Turkish'
 b'others']

Range of contry_of_res:
[b'United States' b'Brazil' b'Spain' b'Egypt' b'New Zealand' b'Bahamas'
 b'Burundi' b'Austria' b'Argentina' b'Jordan' b'Ireland'
 b'United Arab Emirates' b'Afghanistan' b'Lebanon' b'United Kingdom'
 b'South Africa' b'Italy' b'Pakistan' b'Bangladesh' b'Chile' b'France'
 b'China' b'Australia' b'Canada' b'Saudi Arabia' b'Netherlands' b'Romania'
 b'Sweden' b'Tonga' b'Oman' b'India' b'Philippines' b'Sri Lanka'
 b'Sierra Leone' b'Ethiopia' b'Viet Nam' b'Iran' b'Costa Rica' b'Germany'
 b'Mexico' b'Russia' b'Armenia' b'Iceland' b'Nicaragua' b'Hong Kong'
 b'Japan' b'Ukraine' b'Kazakhstan' b'AmericanSamoa' b'Uruguay' b'Serbia'
 b'Portugal' b'Malaysia' b'Ecuador' b'Niger' b'Belgium' b'Bolivia'
 b'Aruba' b'Finland' b'Turkey' b'Nepal' b'Indonesia' b'Angola'
 b'Azerbaijan' b'Iraq' b'Czech Republic' b'Cyprus']

Ran

In [5]:
df = df.replace(b'1', 1)
df = df.replace(b'0', 0)
df = df.replace(b'm', "male")
df = df.replace(b'f', "female")
df = df.replace(b'no', 0)
df = df.replace(b'yes', 1)
df = df.replace(b'NO', 0)
df = df.replace(b'YES', 1)

df = df.replace(b'Self', "Self")
df = df.replace(b'Parent', "Parent")
df = df.replace(b'?', "?")
df = df.replace(b'Health care professional', "HealtCareProfessional")
df = df.replace(b'Relative', "Relative")
df = df.replace(b'Others', "Others")

df.head()

Unnamed: 0,A1_Score,A2_Score,A3_Score,A4_Score,A5_Score,A6_Score,A7_Score,A8_Score,A9_Score,A10_Score,...,gender,ethnicity,jundice,austim,contry_of_res,used_app_before,result,age_desc,relation,Class/ASD
0,1,1,1,1,0,0,1,1,0,0,...,female,b'White-European',0,0,b'United States',0,6.0,b'18 and more',Self,0
1,1,1,0,1,0,0,0,1,0,1,...,male,b'Latino',0,1,b'Brazil',0,5.0,b'18 and more',Self,0
2,1,1,0,1,1,0,1,1,1,1,...,male,b'Latino',1,1,b'Spain',0,8.0,b'18 and more',Parent,1
3,1,1,0,1,0,0,1,1,0,1,...,female,b'White-European',0,1,b'United States',0,6.0,b'18 and more',Self,0
4,1,0,0,0,0,0,0,1,0,0,...,female,?,0,0,b'Egypt',0,2.0,b'18 and more',?,0


In [6]:
autist_df = df[df["austim"] == 1]
non_autist_df = df[df["austim"] == 0]

In [26]:
data = autist_df.groupby("Class/ASD").size().to_frame("count").reset_index()
fig = px.pie(data, names=data["Class/ASD"], values=data["count"], title='Classification of Autists')
fig.show()

In [27]:
data = non_autist_df.groupby("Class/ASD").size().to_frame("count").reset_index()
fig = px.pie(data, names=data["Class/ASD"], values=data["count"], title='Classification of Non-Autists')
fig.show()