In [5]:
import pandas as pd
df = pd.read_csv("study_performance_Areeba.csv")

In [6]:
df['student_name'] = 'Areeba Sajid'      
df['student_id'] = '0013'             

In [7]:
df = df[df['gender'] == 'female']


In [8]:
df.to_csv("study_performance_Areeba.csv", index=False)


In [9]:
print("First 10 rows of the dataset:")
print(df.head(10))


First 10 rows of the dataset:
   gender race_ethnicity parental_level_of_education         lunch  \
0  female        group B           bachelor's degree      standard   
1  female        group C                some college      standard   
2  female        group B             master's degree      standard   
3  female        group B          associate's degree      standard   
4  female        group B                some college      standard   
5  female        group B                 high school  free/reduced   
6  female        group B                 high school      standard   
7  female        group A             master's degree      standard   
8  female        group C            some high school      standard   
9  female        group B            some high school  free/reduced   

  test_preparation_course  math_score  reading_score  writing_score  \
0                    none          72             72             74   
1               completed          69             90     

In [10]:
print("\nColumn Data Types:")
print(df.dtypes)


Column Data Types:
gender                         object
race_ethnicity                 object
parental_level_of_education    object
lunch                          object
test_preparation_course        object
math_score                      int64
reading_score                   int64
writing_score                   int64
student_name                   object
student_id                     object
dtype: object


In [11]:
print("\nUnique values in categorical columns:")
print(df.select_dtypes(include='object').nunique())



Unique values in categorical columns:
gender                         1
race_ethnicity                 5
parental_level_of_education    6
lunch                          2
test_preparation_course        2
student_name                   1
student_id                     1
dtype: int64


In [12]:
# Drop missing values (or use df.fillna() if you want to fill)
df = df.dropna()


In [13]:
# Rename columns: lowercase and replace spaces with underscores
df.columns = df.columns.str.lower().str.replace(' ', '_')


In [14]:
score_cols = ['math_score', 'reading_score', 'writing_score']


In [15]:
print("\nAverage Scores:")
print(df[score_cols].mean())



Average Scores:
math_score       63.633205
reading_score    72.608108
writing_score    72.467181
dtype: float64


In [16]:
print("\nMinimum Scores:")
print(df[score_cols].min())


Minimum Scores:
math_score        0
reading_score    17
writing_score    10
dtype: int64


In [17]:
print("\nMaximum Scores:")
print(df[score_cols].max())



Maximum Scores:
math_score       100
reading_score    100
writing_score    100
dtype: int64


In [18]:
print("\nStandard Deviation of Scores:")
print(df[score_cols].std())



Standard Deviation of Scores:
math_score       15.491453
reading_score    14.378245
writing_score    14.844842
dtype: float64


In [19]:
# Students with scores >90 in all subjects
high_scores = df[(df['math_score'] > 90) & (df['reading_score'] > 90) & (df['writing_score'] > 90)]
print("\nStudents with >90 in all subjects:")
print(high_scores)



Students with >90 in all subjects:
     gender race_ethnicity parental_level_of_education         lunch  \
57   female        group E           bachelor's degree      standard   
79   female        group C           bachelor's degree      standard   
90   female        group D            some high school      standard   
218  female        group E                some college      standard   
222  female        group E           bachelor's degree      standard   
271  female        group A            some high school      standard   
280  female        group E           bachelor's degree  free/reduced   
299  female        group C           bachelor's degree      standard   
351  female        group E             master's degree      standard   
367  female        group D                some college      standard   
370  female        group C          associate's degree      standard   
442  female        group B           bachelor's degree      standard   
455  female        group E  

In [20]:
# Sort by writing_score in descending order
sorted_df = df.sort_values(by='writing_score', ascending=False)
print("\nData sorted by writing_score (descending):")
print(sorted_df.head())


Data sorted by writing_score (descending):
     gender race_ethnicity parental_level_of_education         lunch  \
490  female        group D             master's degree      standard   
494  female        group E          associate's degree      standard   
466  female        group D           bachelor's degree  free/reduced   
52   female        group D             master's degree      standard   
57   female        group E           bachelor's degree      standard   

    test_preparation_course  math_score  reading_score  writing_score  \
490                    none          92            100            100   
494                    none         100            100            100   
466               completed          93            100            100   
52                     none          87            100            100   
57                completed          99            100            100   

     student_name student_id  
490  Areeba Sajid       0013  
494  Areeba Sajid     

In [21]:
# Group by gender and find average scores
print("\nAverage Scores by Gender:")
print(df.groupby('gender')[score_cols].mean())


Average Scores by Gender:
        math_score  reading_score  writing_score
gender                                          
female   63.633205      72.608108      72.467181


In [22]:
# Count students by test preparation course
print("\nStudent count by Test Preparation Course:")
print(df.groupby('test_preparation_course').size())


Student count by Test Preparation Course:
test_preparation_course
completed    184
none         334
dtype: int64


In [23]:
# Add average_score column
df['average_score'] = df[score_cols].mean(axis=1)

In [24]:
# Define performance level based on average_score
def performance_level(score):
    if score >= 90:
        return 'Excellent'
    elif score >= 70:
        return 'Good'
    elif score >= 50:
        return 'Average'
    else:
        return 'Poor'

In [25]:
# Add performance_level column
df['performance_level'] = df['average_score'].apply(performance_level)

In [26]:
df.to_csv("final_study_performance_Areeba.csv", index=False)