<a href="https://colab.research.google.com/github/alisakha/MachineLearningwithPython/blob/master/Master_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set seaborn style for plots
sns.set(style="whitegrid")

# Load the Excel file
file_path = '/mnt/data/FINAL_STRONG_ADJUSTED_DATAset_with_CRT_significant.xlsx'
df = pd.read_excel(file_path)

# 1. Calculate the mean and standard deviation of age
mean_age = df['age'].mean()
std_age = df['age'].std()

# 2. Calculate the gender distribution (count of males and females)
gender_distribution = df['gen'].value_counts()

# 3. Extract scores for Little's Law questions (LittSC1 - LittSC4)
littles_scores = df[['LittSC1', 'LittSC2', 'LittSC3', 'LittSC4']]

# 4. Calculate the mean and standard deviation for CRT scores
crt_tot_mean = df['CRTtot'].mean()
crt_tot_std = df['CRTtot'].std()

# --- Data Visualization and Descriptive Statistics ---

# Plot gender distribution
plt.figure(figsize=(6,4))
sns.barplot(x=gender_distribution.index, y=gender_distribution.values, palette='pastel')
plt.title("Gender Distribution")
plt.xlabel("Gender (1 = Male, 0 = Female)")
plt.ylabel("Number of Participants")
plt.xticks(ticks=[0, 1], labels=['Female', 'Male'])
plt.tight_layout()
plt.show()

# Plot distribution of correct answers in Little's Law questions
littles_scores.sum(axis=1).plot(kind='hist', bins=5, color='skyblue', edgecolor='black')
plt.title("Distribution of Correct Answers in Little's Law Questions")
plt.xlabel("Number of Correct Answers")
plt.ylabel("Number of Participants")
plt.grid(True)
plt.tight_layout()
plt.show()

# Plot CRT scores distribution
df['CRTtot'].plot(kind='hist', bins=4, color='lightgreen', edgecolor='black')
plt.title("Distribution of CRT Scores")
plt.xlabel("CRT Score")
plt.ylabel("Number of Participants")
plt.grid(True)
plt.tight_layout()
plt.show()

# --- Summary Data Preparation ---
# Age statistics
age_stats = {
    "Mean Age": mean_age,
    "Standard Deviation of Age": std_age
}

# Gender distribution
gender_stats = gender_distribution.to_dict()

# Distribution of correct answers in Little's Law questions
littles_distribution = littles_scores.sum(axis=0).to_dict()

# CRT scores distribution and descriptive statistics
crt_distribution = df['CRTtot'].value_counts().sort_index().to_dict()

# Combine all summaries into a DataFrame for easier display
data_summary = {
    "Age Statistics": age_stats,
    "Gender Distribution": gender_stats,
    "Little's Law Score Distribution": littles_distribution,
    "CRT Score Distribution": crt_distribution,
    "Mean CRT Score": crt_tot_mean,
    "Standard Deviation CRT Score": crt_tot_std
}

# Display the summary DataFrame
import ace_tools as tools
tools.display_dataframe_to_user(name="Descriptive and Analytical Data Summary", dataframe=pd.DataFrame(data_summary))
