In [1]:
import pandas as pd

data = pd.read_csv('psychology.csv')

accepted_data = data[data['Decision'] == 'Accepted']

grouped_data = accepted_data.groupby(['University', 'Degree']).filter(lambda x: len(x) >= 10).groupby(['University', 'Degree']).mean()

filtered_data = grouped_data[['GPA', 'GRE Verbal', 'GRE Quantitive', 'GRE Writing', 'GRE Subject']].dropna()
filtered_data = filtered_data[filtered_data['GPA'] <= 4]

great_threshold_gpa = 3.8
mid_level_threshold_gpa = 3.5
great_threshold_gre = 160
mid_level_threshold_gre = 155

def classify_school(row):
    if row['GPA'] >= great_threshold_gpa and (row['GRE Verbal'] >= great_threshold_gre and row['GRE Quantitive'] >= great_threshold_gre):
        return 'Great'
    elif row['GPA'] >= mid_level_threshold_gpa and (row['GRE Verbal'] >= mid_level_threshold_gre and row['GRE Quantitive'] >= mid_level_threshold_gre):
        return 'Mid-level'
    else:
        return 'Worse'

filtered_data['Classification'] = filtered_data.apply(classify_school, axis=1)

filtered_data.to_csv('output_analyzed.csv')

great_schools = filtered_data[filtered_data['Classification'] == 'Great'].sort_values(by='GPA', ascending=False)
mid_level_schools = filtered_data[filtered_data['Classification'] == 'Mid-level'].sort_values(by='GPA', ascending=False)
worse_schools = filtered_data[filtered_data['Classification'] == 'Worse'].sort_values(by='GPA', ascending=False)

report = pd.concat([great_schools, mid_level_schools, worse_schools], keys=['Great Schools', 'Mid-level Schools', 'Worse Schools'])

report.to_csv('output_report.csv')

print("\nNumber of Great Schools:", len(great_schools))
print("\nNumber of Mid-level Schools:", len(mid_level_schools))
print("\nNumber of Worse Schools:", len(worse_schools))

print("\n--- Great Schools ---")
print(great_schools)
print("\nNumber of Great Schools:", len(great_schools))

print("\n--- Mid-level Schools ---")
print(mid_level_schools)
print("\nNumber of Mid-level Schools:", len(mid_level_schools))

print("\n--- Worse Schools ---")
print(worse_schools)
print("\nNumber of Worse Schools:", len(worse_schools))



Number of Great Schools: 22

Number of Mid-level Schools: 86

Number of Worse Schools: 28

--- Great Schools ---
                                                               GPA  \
University                                       Degree              
Florida State University                          PhD     4.000000   
University Of Southern California                 PhD     3.990000   
Southern Methodist University                     PhD     3.953333   
University Of Alabama                             PhD     3.950000   
Princeton University                              PhD     3.927500   
Ohio State University (OSU)                       PhD     3.923333   
Lehigh University                                 PhD     3.892500   
University Of Oregon                              PhD     3.890000   
New York University (NYU)                         PhD     3.885000   
University Of Nevada, Reno                        PhD     3.885000   
University Of Wisconsin, Madison              

  grouped_data = accepted_data.groupby(['University', 'Degree']).filter(lambda x: len(x) >= 10).groupby(['University', 'Degree']).mean()
