In [None]:
import ROOT
import numpy as np
from array import array as ary
import math
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import NullFormatter
from matplotlib.dates import MonthLocator, DateFormatter
import datetime
import seaborn as sns 

In [None]:
df_stat = pd.read_csv('GR-percetile-per-question.csv',skipinitialspace=True)
df_stat = df_stat.rename(columns = {'Unnamed: 0':'Question'})
df_stat.set_index("Question" , inplace=True)
df_stat = df_stat.astype(float)
df_stat

In [None]:
list_series = []
list_leg = []
for column in df_stat.columns:
    list_series.append(df_stat[column].tolist())
    list_leg.append(column)

In [None]:
sns.set()

dims = (15,4)
fig, ax = plt.subplots(figsize=dims)

colormap = plt.cm.get_cmap('Spectral') # reverse the colormap --> 'YlGnBu_r'
plt.gca().set_prop_cycle(plt.cycler('color', colormap(np.linspace(0, 1, len(list_leg)))))

plt.hist(list_series, bins=10, range=[0,100])
ax.set_xlabel('Percentile',fontsize=25)
ax.set_ylabel('Entries', fontsize=25)
ax.tick_params(axis='x', labelsize=23)
ax.tick_params(axis='y', labelsize=23)
ax.set_xlim([0,100])
plt.legend(list_leg, loc='center left', bbox_to_anchor=(1, 0.5),fontsize='xx-large')

plt.tight_layout()
plt.savefig('./plot/GR-Percentile-hist.png',dpi=500)
plt.savefig('./plot/GR-Percentile-hist.pdf')
plt.show()

In [None]:
# sns.reset_orig()
x = np.linspace(1, 100, 100)

evenly_spaced_interval = np.linspace(0, 1, len(list_leg))
colors = [plt.cm.Spectral(x) for x in evenly_spaced_interval]

fig, axes = plt.subplots(len(list_leg),figsize=(7,5), sharex=True, sharey=True, gridspec_kw={'hspace': 0})
fig.add_subplot(111, frameon=False)

for i, color in enumerate(colors):
    axes[i].plot(x, list_series[i], color=color)
    axes[i].set(xlim=(0, 101))
    axes[i].legend((list_leg[i],),loc='upper right')
    sns.regplot(x, list_series[i],ax=axes[i], color=color)
    axes[i].grid(True)

plt.tight_layout()
# hide tick and tick label of the big axis
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
plt.xlabel('Question number')
plt.ylabel('Percentile')

plt.savefig('./plot/GR-Percentile-line.png',dpi=500)
plt.savefig('./plot/GR-Percentile-line.pdf')
plt.show()


In [None]:
dims = (7,6)

fig, ax = plt.subplots(figsize=dims)
colormap = plt.cm.get_cmap('Spectral') # reverse the colormap --> 'YlGnBu_r'
plt.gca().set_prop_cycle(plt.cycler('color', colormap(np.linspace(0, 1, len(list_leg)))))

for ilist in range(len(list_leg)):
    sns.regplot(x, list_series[ilist], scatter_kws={'s':0})
    
ax.set_xlabel('Question',fontsize=23)
ax.set_ylabel('Percentile', fontsize=23)
ax.tick_params(axis='x', labelsize=20)
ax.tick_params(axis='y', labelsize=20)
ax.set_xlim([0,100])
ax.set_ylim([20,70])
plt.legend(list_leg, loc='best',fontsize='x-large')

plt.tight_layout()
plt.savefig('./plot/GR-Percentile-regplot-onlyfit.png',dpi=500)
plt.savefig('./plot/GR-Percentile-regplot-onlyfit.pdf')
plt.show()

In [None]:
import joypy

In [None]:
# sns.reset_orig()
fig, axes = joypy.joyplot(df_stat,colormap=plt.cm.Spectral, figsize=(6,5))
ax = axes[-1]
ax.set_xlabel('Percentile')
# ax.tick_params(axis='x', labelsize=23)
ax.xaxis.set_visible(True)

ax.yaxis.set_ticks([])

# plt.tight_layout()
plt.savefig('./plot/GR-Percentile-joyplot.png',dpi=500)
plt.savefig('./plot/GR-Percentile-joyplot.pdf')
plt.show()

In [None]:
list_PracSet = ['GR8677','GR9277','GR9677','GR0177','GR0877','GR1777']
labels = ['Mechanics',
          'Electromagnetism',
          'Quantum mechanics',
          'Thermodynamics and statistical mechanics',
          'Optics and wave',
          'Special relativity',
          'Atomic physics',
          'Lab methods',
          'Specialized topics']
list_df = []

cmap = plt.get_cmap('Spectral')
colors = [cmap(i) for i in np.linspace(0, 1, 9)]
    
for i in range(len(list_PracSet)):
    list_df.append(pd.read_csv(list_PracSet[i]+'.csv',skipinitialspace=True))
    list_df[i].set_index('Question' , inplace=True)
    list_df[i]['Percentile'] = list_df[i]['Percentile'].astype(float)
    
    CM = list_df[i].loc[(list_df[i]['Subject_type']=='Mechanics')]
    EM = list_df[i].loc[(list_df[i]['Subject_type']=='Electromagnetism')]
    QM = list_df[i].loc[(list_df[i]['Subject_type']=='Quantum mechanics')]
    TS = list_df[i].loc[(list_df[i]['Subject_type']=='Thermodynamics and statistical mechanics')]
    WO = list_df[i].loc[(list_df[i]['Subject_type']=='Optics and wave')]
    SR = list_df[i].loc[(list_df[i]['Subject_type']=='Special relativity')]
    AP = list_df[i].loc[(list_df[i]['Subject_type']=='Atomic physics')]
    LM = list_df[i].loc[(list_df[i]['Subject_type']=='Lab methods')]
    ST = list_df[i].loc[(list_df[i]['Subject_type'].str.contains('Specialized topics', case=True, regex=True))]
    
    fig = plt.figure(figsize=(6,6))

    LSplit = [CM['Percentile'].count(),
              EM['Percentile'].count(),
              QM['Percentile'].count(),
              TS['Percentile'].count(),
              WO['Percentile'].count(),
              SR['Percentile'].count(),
              AP['Percentile'].count(),
              LM['Percentile'].count(),
              ST['Percentile'].count()]

    LegendPie = plt.pie(LSplit,labels= ['','','','','','','','',''],
                    autopct ='%1.1f%%', colors=colors, shadow = False, startangle = 90,
                    pctdistance=0.8, labeldistance=1.2,
                    textprops={'fontsize': 16})
    plt.title(list_PracSet[i]+' (Percentile all)',fontsize = 18)
    plt.legend(labels,bbox_to_anchor=(1.25,0.5), loc="center right",bbox_transform=plt.gcf().transFigure)

    plt.tight_layout()
    plt.savefig('./plot/'+list_PracSet[i]+'-Subject-Pie.png',bbox_inches = 'tight', dpi=500)
    plt.savefig('./plot/'+list_PracSet[i]+'-Subject-Pie.pdf',bbox_inches = 'tight')
    plt.show()
    

In [None]:
# Pcut = 50
list_PracSet = ['GR8677','GR9277','GR9677','GR0177','GR0877','GR1777']
labels = ['Mechanics',
          'Electromagnetism',
          'Quantum mechanics',
          'Thermodynamics and statistical mechanics',
          'Optics and wave',
          'Special relativity',
          'Atomic physics',
          'Lab methods',
          'Specialized topics']


# list_CM_frac = []
# list_EM_frac = []
# list_QM_frac = []
# list_TS_frac = []
# list_WO_frac = []
# list_SR_frac = []
# list_AP_frac = []
# list_LM_frac = []
# list_ST_frac = []

cmap = plt.get_cmap('Spectral')
colors = [cmap(i) for i in np.linspace(0, 1, 9)]
    
for Pcut in range(90,20,-10):
    print (Pcut)
    list_df = []
    for i in range(len(list_PracSet)):
        list_df.append(pd.read_csv(list_PracSet[i]+'.csv',skipinitialspace=True))
        list_df[i].set_index('Question' , inplace=True)
        list_df[i]['Percentile'] = list_df[i]['Percentile'].astype(float)
    
        CM = list_df[i].loc[(list_df[i]['Subject_type']=='Mechanics') & (list_df[i]['Percentile']<Pcut)]
        EM = list_df[i].loc[(list_df[i]['Subject_type']=='Electromagnetism') & (list_df[i]['Percentile']<Pcut)]
        QM = list_df[i].loc[(list_df[i]['Subject_type']=='Quantum mechanics') & (list_df[i]['Percentile']<Pcut)]
        TS = list_df[i].loc[(list_df[i]['Subject_type']=='Thermodynamics and statistical mechanics') & (list_df[i]['Percentile']<Pcut)]
        WO = list_df[i].loc[(list_df[i]['Subject_type']=='Optics and wave') & (list_df[i]['Percentile']<Pcut)]
        SR = list_df[i].loc[(list_df[i]['Subject_type']=='Special relativity') & (list_df[i]['Percentile']<Pcut)]
        AP = list_df[i].loc[(list_df[i]['Subject_type']=='Atomic physics') & (list_df[i]['Percentile']<Pcut)]
        LM = list_df[i].loc[(list_df[i]['Subject_type']=='Lab methods') & (list_df[i]['Percentile']<Pcut)]
        ST = list_df[i].loc[(list_df[i]['Subject_type'].str.contains('Specialized topics', case=True, regex=True)) & (list_df[i]['Percentile']<Pcut)]
    
        fig = plt.figure(figsize=(6,6))

        LSplit = [CM['Percentile'].count(),
                  EM['Percentile'].count(),
                  QM['Percentile'].count(),
                  TS['Percentile'].count(),
                  WO['Percentile'].count(),
                  SR['Percentile'].count(),
                  AP['Percentile'].count(),
                  LM['Percentile'].count(),
                  ST['Percentile'].count()]

        LegendPie = plt.pie(LSplit,labels= ['','','','','','','','',''],
                        autopct ='%1.1f%%', colors=colors, shadow = False, startangle = 90,
                        pctdistance=0.8, labeldistance=1.2,
                        textprops={'fontsize': 16})
        plt.title(list_PracSet[i]+' (Percentile < %d)' % Pcut,fontsize = 18)
        plt.legend(labels,bbox_to_anchor=(1.25,0.5), loc="center right",bbox_transform=plt.gcf().transFigure)

        plt.tight_layout()
        plt.savefig('./plot/'+list_PracSet[i]+'-Subject-Pie-Pless%d.png' % Pcut, bbox_inches = 'tight', dpi=500)
        plt.savefig('./plot/'+list_PracSet[i]+'-Subject-Pie-Pless%d.pdf' % Pcut, bbox_inches = 'tight')
        # plt.show()
    

In [None]:
list_PracSet = ['GR1777','GR0877','GR0177','GR9677','GR9277','GR8677']
labels = ['Mechanics',
          'Electromagnetism',
          'Quantum mechanics',
          'Thermodynamics and statistical mechanics',
          'Optics and wave',
          'Special relativity',
          'Atomic physics',
          'Lab methods',
          'Specialized topics']
list_df = []

list_CM_frac = []
list_EM_frac = []
list_QM_frac = []
list_TS_frac = []
list_WO_frac = []
list_SR_frac = []
list_AP_frac = []
list_LM_frac = []
list_ST_frac = []

cmap = plt.get_cmap('Spectral')
colors = [cmap(i) for i in np.linspace(0, 1, 9)]
    

for i in range(len(list_PracSet)):
    list_df = []
    list_CM_frac_tmp = []
    list_EM_frac_tmp = []
    list_QM_frac_tmp = []
    list_TS_frac_tmp = []
    list_WO_frac_tmp = []
    list_SR_frac_tmp = []
    list_AP_frac_tmp = []
    list_LM_frac_tmp = []
    list_ST_frac_tmp = []
    
    for Pcut in range(100,20,-10):
        list_df = []
        print (list_PracSet[i],Pcut)
        list_df.append(pd.read_csv(list_PracSet[i]+'.csv',skipinitialspace=True))
        list_df[0].set_index('Question' , inplace=True)
        list_df[0]['Percentile'] = list_df[0]['Percentile'].astype(float)
    
        CM = list_df[0].loc[(list_df[0]['Subject_type']=='Mechanics') & (list_df[0]['Percentile']<Pcut)]
        EM = list_df[0].loc[(list_df[0]['Subject_type']=='Electromagnetism') & (list_df[0]['Percentile']<Pcut)]
        QM = list_df[0].loc[(list_df[0]['Subject_type']=='Quantum mechanics') & (list_df[0]['Percentile']<Pcut)]
        TS = list_df[0].loc[(list_df[0]['Subject_type']=='Thermodynamics and statistical mechanics') & (list_df[0]['Percentile']<Pcut)]
        WO = list_df[0].loc[(list_df[0]['Subject_type']=='Optics and wave') & (list_df[0]['Percentile']<Pcut)]
        SR = list_df[0].loc[(list_df[0]['Subject_type']=='Special relativity') & (list_df[0]['Percentile']<Pcut)]
        AP = list_df[0].loc[(list_df[0]['Subject_type']=='Atomic physics') & (list_df[0]['Percentile']<Pcut)]
        LM = list_df[0].loc[(list_df[0]['Subject_type']=='Lab methods') & (list_df[0]['Percentile']<Pcut)]
        ST = list_df[0].loc[(list_df[0]['Subject_type'].str.contains('Specialized topics', case=True, regex=True)) & (list_df[0]['Percentile']<Pcut)]
    
        
        list_CM_frac_tmp.append(CM['Percentile'].count()/(CM['Percentile'].count()+
                                                          EM['Percentile'].count()+
                                                          QM['Percentile'].count()+
                                                          TS['Percentile'].count()+
                                                          WO['Percentile'].count()+
                                                          SR['Percentile'].count()+
                                                          AP['Percentile'].count()+
                                                          LM['Percentile'].count()+
                                                          ST['Percentile'].count()))
        list_EM_frac_tmp.append(EM['Percentile'].count()/(CM['Percentile'].count()+
                                                          EM['Percentile'].count()+
                                                          QM['Percentile'].count()+
                                                          TS['Percentile'].count()+
                                                          WO['Percentile'].count()+
                                                          SR['Percentile'].count()+
                                                          AP['Percentile'].count()+
                                                          LM['Percentile'].count()+
                                                          ST['Percentile'].count()))
        list_QM_frac_tmp.append(QM['Percentile'].count()/(CM['Percentile'].count()+
                                                          EM['Percentile'].count()+
                                                          QM['Percentile'].count()+
                                                          TS['Percentile'].count()+
                                                          WO['Percentile'].count()+
                                                          SR['Percentile'].count()+
                                                          AP['Percentile'].count()+
                                                          LM['Percentile'].count()+
                                                          ST['Percentile'].count()))
        list_TS_frac_tmp.append(TS['Percentile'].count()/(CM['Percentile'].count()+
                                                          EM['Percentile'].count()+
                                                          QM['Percentile'].count()+
                                                          TS['Percentile'].count()+
                                                          WO['Percentile'].count()+
                                                          SR['Percentile'].count()+
                                                          AP['Percentile'].count()+
                                                          LM['Percentile'].count()+
                                                          ST['Percentile'].count()))
        list_WO_frac_tmp.append(WO['Percentile'].count()/(CM['Percentile'].count()+
                                                          EM['Percentile'].count()+
                                                          QM['Percentile'].count()+
                                                          TS['Percentile'].count()+
                                                          WO['Percentile'].count()+
                                                          SR['Percentile'].count()+
                                                          AP['Percentile'].count()+
                                                          LM['Percentile'].count()+
                                                          ST['Percentile'].count()))
        list_SR_frac_tmp.append(SR['Percentile'].count()/(CM['Percentile'].count()+
                                                          EM['Percentile'].count()+
                                                          QM['Percentile'].count()+
                                                          TS['Percentile'].count()+
                                                          WO['Percentile'].count()+
                                                          SR['Percentile'].count()+
                                                          AP['Percentile'].count()+
                                                          LM['Percentile'].count()+
                                                          ST['Percentile'].count()))
        list_AP_frac_tmp.append(AP['Percentile'].count()/(CM['Percentile'].count()+
                                                          EM['Percentile'].count()+
                                                          QM['Percentile'].count()+
                                                          TS['Percentile'].count()+
                                                          WO['Percentile'].count()+
                                                          SR['Percentile'].count()+
                                                          AP['Percentile'].count()+
                                                          LM['Percentile'].count()+
                                                          ST['Percentile'].count()))
        list_LM_frac_tmp.append(LM['Percentile'].count()/(CM['Percentile'].count()+
                                                          EM['Percentile'].count()+
                                                          QM['Percentile'].count()+
                                                          TS['Percentile'].count()+
                                                          WO['Percentile'].count()+
                                                          SR['Percentile'].count()+
                                                          AP['Percentile'].count()+
                                                          LM['Percentile'].count()+
                                                          ST['Percentile'].count()))
        list_ST_frac_tmp.append(ST['Percentile'].count()/(CM['Percentile'].count()+
                                                          EM['Percentile'].count()+
                                                          QM['Percentile'].count()+
                                                          TS['Percentile'].count()+
                                                          WO['Percentile'].count()+
                                                          SR['Percentile'].count()+
                                                          AP['Percentile'].count()+
                                                          LM['Percentile'].count()+
                                                          ST['Percentile'].count()))
        
    list_CM_frac.append(list_CM_frac_tmp)
    list_EM_frac.append(list_EM_frac_tmp)
    list_QM_frac.append(list_QM_frac_tmp)
    list_TS_frac.append(list_TS_frac_tmp)
    list_WO_frac.append(list_WO_frac_tmp)
    list_SR_frac.append(list_SR_frac_tmp)
    list_AP_frac.append(list_AP_frac_tmp)
    list_LM_frac.append(list_LM_frac_tmp)
    list_ST_frac.append(list_ST_frac_tmp)
    

In [None]:
# list_CM_frac
column_names = ['GR1777','GR0877','GR0177','GR9677','GR9277','GR8677']
list_pless = ['P<100','P<90','P<80','P<70','P<60','P<50','P<40','P<30']
df_CM_frac = pd.DataFrame(columns = column_names)
df_EM_frac = pd.DataFrame(columns = column_names)
df_QM_frac = pd.DataFrame(columns = column_names)
df_TS_frac = pd.DataFrame(columns = column_names)
df_WO_frac = pd.DataFrame(columns = column_names)
df_SR_frac = pd.DataFrame(columns = column_names)
df_AP_frac = pd.DataFrame(columns = column_names)
df_LM_frac = pd.DataFrame(columns = column_names)
df_ST_frac = pd.DataFrame(columns = column_names)

for i in range(len(column_names)):
    df_CM_frac[column_names[i]] = list_CM_frac[i]
    df_EM_frac[column_names[i]] = list_EM_frac[i]
    df_QM_frac[column_names[i]] = list_QM_frac[i]
    df_TS_frac[column_names[i]] = list_TS_frac[i]
    df_WO_frac[column_names[i]] = list_WO_frac[i]
    df_SR_frac[column_names[i]] = list_SR_frac[i]
    df_AP_frac[column_names[i]] = list_AP_frac[i]
    df_LM_frac[column_names[i]] = list_LM_frac[i]
    df_ST_frac[column_names[i]] = list_ST_frac[i]
    
df_CM_frac['Percentile'] = list_pless
df_EM_frac['Percentile'] = list_pless
df_QM_frac['Percentile'] = list_pless
df_TS_frac['Percentile'] = list_pless
df_WO_frac['Percentile'] = list_pless
df_SR_frac['Percentile'] = list_pless
df_AP_frac['Percentile'] = list_pless
df_LM_frac['Percentile'] = list_pless
df_ST_frac['Percentile'] = list_pless

df_CM_frac.set_index('Percentile' , inplace=True)
df_EM_frac.set_index('Percentile' , inplace=True)
df_QM_frac.set_index('Percentile' , inplace=True)
df_TS_frac.set_index('Percentile' , inplace=True)
df_WO_frac.set_index('Percentile' , inplace=True)
df_SR_frac.set_index('Percentile' , inplace=True)
df_AP_frac.set_index('Percentile' , inplace=True)
df_LM_frac.set_index('Percentile' , inplace=True)
df_ST_frac.set_index('Percentile' , inplace=True)

df_frac=[]
df_frac.append(df_CM_frac)
df_frac.append(df_EM_frac)
df_frac.append(df_QM_frac)
df_frac.append(df_TS_frac)
df_frac.append(df_WO_frac)
df_frac.append(df_SR_frac)
df_frac.append(df_AP_frac)
df_frac.append(df_LM_frac)
df_frac.append(df_ST_frac)

list_frac=[]
list_frac.append(list_CM_frac)
list_frac.append(list_EM_frac)
list_frac.append(list_QM_frac)
list_frac.append(list_TS_frac)
list_frac.append(list_WO_frac)
list_frac.append(list_SR_frac)
list_frac.append(list_AP_frac)
list_frac.append(list_LM_frac)
list_frac.append(list_ST_frac)

df_CM_frac

In [None]:
labels = ['Mechanics',
          'Electromagnetism',
          'Quantum mechanics',
          'Thermodynamics and statistical mechanics',
          'Optics and wave',
          'Special relativity',
          'Atomic physics',
          'Lab methods',
          'Specialized topics']

for ilabel in range(len(labels)):
    dims = (11,6)
    fig, ax = plt.subplots(figsize=dims)

    colormap = plt.cm.get_cmap('Spectral') # reverse the colormap --> 'YlGnBu_r'
    plt.gca().set_prop_cycle(plt.cycler('color', colormap(np.linspace(0, 1, len(list_CM_frac)))))

    for i in range(len(list_frac[ilabel])):
        plt.plot(df_frac[i].index, list_frac[ilabel][i], '-o',linewidth=3, markersize=10)

    # ax.set_xlabel('Percentile',fontsize=25)
    ax.set_ylabel('Fraction', fontsize=25)
    ax.tick_params(axis='x', labelsize=20)
    ax.tick_params(axis='y', labelsize=23)
    ax.set_title(labels[ilabel],fontsize=25)
    plt.legend(list_leg, loc='center left', bbox_to_anchor=(1.1, 0.5),fontsize='xx-large')

    plt.tight_layout()
    plt.savefig('./plot/GR-Percentile-fraction-'+labels[ilabel]+'.png',dpi=500)
    plt.savefig('./plot/GR-Percentile-fraction-'+labels[ilabel]+'.pdf')
    # plt.show()


In [None]:
# pd.set_option("max_rows", 10)
pd.set_option("max_rows", None)
list_df_Pless[2]