In [None]:
import csv
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from collections import defaultdict
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [None]:
def SeparateColumns(data):
    columns = defaultdict(list)
    with open(data, 'r') as f:
        reader = csv.reader(f, delimiter=';')
        headers = next(reader)
        column_nums = range(len(headers))

        for row in reader:
            for i in column_nums:
                columns[headers[i]].append(row[i])
    return dict(columns)

In [None]:
def cleanData(data):
    for col in data.columns.values:
        data[col] = data[col].astype('string')
    #----------
    for col in data.columns.values:
        data[col] = data[col].astype('float', errors = 'ignore')
    #-----------
    data['Gender'] = data.Gender.map({'Male': 1, 'Female': 2})
    data['Dyslexia'] = data.Dyslexia.map({'No': 0, 'Yes': 1})
    data['Nativelang'] = data.Nativelang.map({'No': 0, 'Yes': 1})
    data['Otherlang'] = data.Otherlang.map({'No': 0, 'Yes': 1})

In [None]:
#Remove some questions
def process_desktop_data(desktop_data):
    commonality_columns = ['Gender', 'Nativelang', 'Otherlang', 'Age', 'Dyslexia']

    for i in range(30):
        if ((i >= 0 and i < 12) or (i >= 13 and i < 17) or i == 21 or i == 22 or i == 29):
            commonality_columns.extend([
                'Clicks' + str(i + 1),
                'Hits' + str(i + 1),
                'Misses' + str(i + 1),
                # 'Score' + str(i + 1),
                'Accuracy' + str(i + 1),
                'Missrate' + str(i + 1)
            ])

    processed_data = desktop_data.loc[:, commonality_columns]
    return processed_data

In [None]:
def removeScoreColumns(data):
    for i in range(32):
        i += 1
        score = str(i)
        del data['Score' + score]
    return data

In [None]:
# Read dataset & clean it & Sort the dataset
columns = SeparateColumns("Dyt-desktop.csv")
desktopData = pd.DataFrame.from_dict(columns)

cleanData(desktopData)

desktopData = desktopData.sort_values(by = ["Age"], ascending=True)

desktopData = removeScoreColumns(desktopData)

# desktopData = process_desktop_data(desktopData)
desktopData.head()

Unnamed: 0,Gender,Nativelang,Otherlang,Age,Clicks1,Hits1,Misses1,Accuracy1,Missrate1,Clicks2,...,Hits31,Misses31,Accuracy31,Missrate31,Clicks32,Hits32,Misses32,Accuracy32,Missrate32,Dyslexia
0,1,0,1,7.0,10.0,10.0,0.0,1.0,0.0,5.0,...,0.0,0.0,0.0,0.0,17.0,2.0,0.0,0.117647,0.0,0
1499,1,0,1,7.0,1.0,1.0,0.0,1.0,0.0,3.0,...,1.0,1.0,0.030303,0.030303,22.0,2.0,0.0,0.090909,0.0,0
334,2,0,1,7.0,5.0,1.0,1.0,0.2,0.2,9.0,...,3.0,0.0,0.090909,0.0,16.0,1.0,2.0,0.0625,125.0,0
1770,2,1,1,7.0,2.0,2.0,0.0,1.0,0.0,2.0,...,0.0,0.0,0.0,0.0,21.0,1.0,1.0,0.047619,0.047619,1
1505,2,0,1,7.0,5.0,5.0,0.0,1.0,0.0,4.0,...,0.0,1.0,0.0,0.041667,20.0,1.0,0.0,0.05,0.0,0


In [None]:
# Test the model and get the accuracy
y = desktopData['Dyslexia']
X = desktopData.loc[:, desktopData.columns != 'Dyslexia']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

rfc = RandomForestClassifier()
rfc.fit(X_train , y_train)
y_pred = rfc.predict(X_test)
accuracy = metrics.accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.8952618453865336


In [None]:
# Function to filter the data by given start and end ages
def filter_data_by_age(data, start_age, end_age):
  filtered_data = data[(data['Age'] >= start_age) & (data['Age'] <= end_age)].copy()
  return filtered_data

In [None]:
# Function to get the average of Accuracy given columns in the data
def get_average(start, end, data):
  average = 0.0
  count = 0
  for i in range(start,end+1):
    average += data[["Accuracy"+str(i)]].mean()["Accuracy"+str(i)]
    count +=1
  print("Accuracy Average of Q" + str(start) + " to Q" + str(end) + " " + str(average/count))
  return average/count

In [None]:
def predicate_dyslexia(user_data):
    # Select features for prediction (excluding 'Dyslexia' column)
    X_user1 = user_data.loc[:, user_data.columns != 'Dyslexia']

    # Use the trained model to predict dyslexia for 'user1'
    prediction_user = rfc.predict(X_user1)

    # Print the prediction
    if prediction_user[0] == 1:
        print("User1 has dyslexia.")
        return 1
    else:
        print("User1 does not have dyslexia.")
        return 0

In [None]:
user = desktopData.sample(n=1)

In [None]:
predicate_result = predicate_dyslexia(user)
print(predicate_result)

In [None]:
# user = desktopData.sample(n=1)
# predicate_result = predicate_dyslexia(user)
# print(predicate_result)

# i = 0
# while i == 0:
#     user = desktopData.sample(n=1)
#     predicate_result = predicate_dyslexia(user)
#     if(predicate_result == 1):
#         print(predicate_result)
#         i = 1

# user.head()
# predicate_result = predicate_dyslexia(user)
# print(predicate_result)

In [None]:
#get the data filterd by user age
data_filterd_for_user = filter_data_by_age(desktopData, user["Age"].values[0], user["Age"].values[0])

In [None]:
#get the least question accuracy user
least_accuracy = user["Accuracy1"].values[0]
question_number = 1
for i in range(2, 32):
    if(user["Accuracy" + str(i)].values[0] < least_accuracy):
        least_accuracy = user["Accuracy" + str(i)].values[0]
        question_number = i
least_accuracy
question_number

In [None]:
for i in range(1, 33):
    column_name = 'Accuracy' + str(i)
    if column_name in data_filterd_for_user.columns:
        data_filterd_for_user[column_name] = data_filterd_for_user[column_name].astype(float)

In [None]:
#get the category of this question
def getReport(question_number, user_accuracy):
    problems = [
        "Which is reading acquisition: Alphabetic Awareness, Phonological Awareness and Visual discrimination and categorization.",
        "Which is Phonological Awareness, Syllabic Awareness and Auditory Discrimination and Categorization",
        "Which is Lexical Awareness, Auditory Working Memory, and Auditory Discrimination and Categorization.",
        "Which is Visual Discrimination and Categorization, and Executive Functions",
        "Which is Visual Working Memory, Sequential Auditory Working Memory, and Auditory Discrimination and Categorization.",
        "Which is Lexical, Phonological, and Orthographic Awareness",
        "Which is Morphological and Semantic Awareness.",
        "Which is Syntactic Awareness",
        "Which is Phonological, Lexical and, Orthographic Awareness",
        "Which is Phonological, Lexical and Orthographic Awareness",
        "Which is Phonological, Lexical and Orthographic Awareness",
        "Which is Sequential Visual Working Memory ",
        "Which is Lexical, Orthographic Awareness and Auditory Working Memory, anama Sequential Auditory Working Memory and Phonological Awareness.",
        ]
    if question_number in [1, 2, 3, 4]:
        average = get_average(1, 4, data_filterd_for_user)
        if(user_accuracy < average):
            print("User has proplem in Category 1 " + problems[0])
            print("Accuracy of this category is " + str(average))
            print("User Accuracy is " + str(user_accuracy))
        return
    elif question_number in [5, 6, 7, 8, 9]:
        average = get_average(5, 9, data_filterd_for_user)
        if(user_accuracy < average):
            print("User has proplem in Category 2 " + problems[1])
            print("Accuracy of this category is " + str(average))
            print("User Accuracy is " + str(user_accuracy))
        return
    elif question_number in [10, 11, 12, 13]:
        average = get_average(10, 13, data_filterd_for_user)
        if(user_accuracy < average):
            print("User has proplem in Category 3 " + problems[2])
            print("Accuracy of this category is " + str(average))
            print("User Accuracy is " + str(user_accuracy))
        return
    elif question_number in [14, 15, 16, 17]:
        average = get_average(14, 17, data_filterd_for_user)
        if(user_accuracy < average):
            print("User has proplem in Category 4 " + problems[3])
            print("Accuracy of this category is " + str(average))
            print("User Accuracy is " + str(user_accuracy))
        return
    elif question_number in [18, 19, 20, 21]:
        average = get_average(18, 21, data_filterd_for_user)
        if(user_accuracy < average):
            print("User has proplem in Category 5 " + problems[4])
            print("Accuracy of this category is " + str(average))
            print("User Accuracy is " + str(user_accuracy))
        return
    elif question_number in [22, 23]:
        average = get_average(22, 23, data_filterd_for_user)
        if(user_accuracy < average):
            print("User has proplem in Category 6 " + problems[5])
            print("Accuracy of this category is " + str(average))
            print("User Accuracy is " + str(user_accuracy))
        return
    elif question_number in [24]:
        average = get_average(24, 24, data_filterd_for_user)
        if(user_accuracy < average):
            print("User has proplem in Category 7 " + problems[6])
            print("Accuracy of this category is " + str(average))
            print("User Accuracy is " + str(user_accuracy))
        return
    elif question_number in [25]:
        category_average = get_average(25, 25, data_filterd_for_user)
        if(user_accuracy < category_average):
            print("User has proplem in Category 8 " + problems[7])
            print("Accuracy of this category is " + str(category_average))
            print("User Accuracy is " + str(user_accuracy))
        return
    elif question_number in [26]:
        average = get_average(26, 26, data_filterd_for_user)
        if(user_accuracy < average):
            print("User has proplem in Category 9 " + problems[8])
            print("Accuracy of this category is " + str(average))
            print("User Accuracy is " + str(user_accuracy))
        return
    elif question_number in [27, 28]:
        average = get_average(27, 28, data_filterd_for_user)
        if(user_accuracy < average):
            print("User has proplem in Category 10 " + problems[9])
            print("Accuracy of this category is " + str(average))
            print("User Accuracy is " + str(user_accuracy))
        return
    elif question_number in [29]:
        average = get_average(29, 29, data_filterd_for_user)
        if(user_accuracy < average):
            print("User has proplem in Category 11 " + problems[10])
            print("Accuracy of this category is " + str(average))
            print("User Accuracy is " + str(user_accuracy))
        return
    elif question_number in [30]:
        average = get_average(30, 30, data_filterd_for_user)
        if(user_accuracy < average):
            print("User has proplem in Category 12 " + problems[11])
            print("Accuracy of this category is " + str(average))
            print("User Accuracy is " + str(user_accuracy))
        return
    elif question_number in [31, 32]:
        average = get_average(31, 32, data_filterd_for_user)
        if(user_accuracy < average):
            print("User has proplem in Category 13 " + problems[12])
            print("Accuracy of this category is " + str(average))
            print("User Accuracy is " + str(user_accuracy))
        return

In [None]:
getReport(question_number, least_accuracy)