Import relevant Libraries

In [1]:
import pandas as pd
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import matplotlib.pyplot as plt
sns.set_style('whitegrid')

Read all files

In [2]:
# Major file, inside here we have id_major	id_university	type	major_name	capacity columns
majors = pd.read_csv('./files/majors.csv')
print(majors.isna().sum()) #check for some null 
majors.sample(5)

Unnamed: 0       0
id_major         0
id_university    0
type             0
major_name       0
capacity         0
dtype: int64


Unnamed: 0.1,Unnamed: 0,id_major,id_university,type,major_name,capacity
1512,1512,7511081,751,science,FISIKA,28
1934,1934,1432087,143,humanities,FOTOGRAFI,54
1726,1726,1112184,111,humanities,SOSIOLOGI,48
2666,2666,3852065,385,humanities,ILMU KOMUNIKASI,140
156,156,1311167,131,science,TEKNIK KIMIA,48


In [3]:
# we have id_university and the name of the university 
universities = pd.read_csv('./files/universities.csv')
print(universities.isna().sum()) #check some null 
universities.sample(5)

Unnamed: 0         0
id_university      0
university_name    0
dtype: int64


Unnamed: 0.1,Unnamed: 0,id_university,university_name
63,63,611,UNIVERSITAS UDAYANA
46,46,365,UNIVERSITAS ISLAM NEGERI SUNAN KALIJAGA
61,61,542,INSTITUT TEKNOLOGI KALIMANTAN
25,25,323,UNIVERSITAS NEGERI JAKARTA
23,23,321,UNIVERSITAS INDONESIA


In [4]:
# in here we have all the test score from humanities
score_humanities = pd.read_csv('./files/score_humanities.csv')
print(score_humanities.isna().sum()) #check some null 
score_humanities.sample(5)

Unnamed: 0              0
id_first_major          0
id_first_university     0
id_second_major         0
id_second_university    0
id_user                 0
score_eko               0
score_geo               0
score_kmb               0
score_kpu               0
score_kua               0
score_mat               0
score_ppu               0
score_sej               0
score_sos               0
dtype: int64


Unnamed: 0.1,Unnamed: 0,id_first_major,id_first_university,id_second_major,id_second_university,id_user,score_eko,score_geo,score_kmb,score_kpu,score_kua,score_mat,score_ppu,score_sej,score_sos
18131,18131,3722274,372,3722297,372,122545,708,579,674,543,427,533,465,648,591
38512,38512,3342517,334,3232054,323,231561,599,605,503,605,544,535,414,485,511
27533,27533,1712136,171,1712183,171,181492,419,273,382,323,499,377,525,325,345
15177,15177,3332345,333,3332337,333,104951,601,602,365,537,444,492,412,444,354
16058,16058,3722235,372,6112064,611,109925,521,550,603,477,555,647,545,523,516


In [5]:
# we have all the test score from science
score_science = pd.read_csv('./files/score_science.csv')
print(score_science.isna().sum()) #check some null 
score_science.sample(5)

Unnamed: 0              0
id_first_major          0
id_first_university     0
id_second_major         0
id_second_university    0
id_user                 0
score_bio               0
score_fis               0
score_kim               0
score_kmb               0
score_kpu               0
score_kua               0
score_mat               0
score_ppu               0
dtype: int64


Unnamed: 0.1,Unnamed: 0,id_first_major,id_first_university,id_second_major,id_second_university,id_user,score_bio,score_fis,score_kim,score_kmb,score_kpu,score_kua,score_mat,score_ppu
32327,32327,3851046,385,3551221,355,135697,621,471,552,484,624,656,581,430
36916,36916,3561134,356,3561181,356,155359,641,682,690,616,623,692,452,746
12433,12433,1311032,131,1421027,142,60337,408,526,415,541,686,476,518,641
44182,44182,3611074,361,5311337,531,186707,579,473,492,614,609,595,459,535
31818,31818,7211015,721,1111181,111,133639,448,431,444,490,510,537,476,516


there is no missing data in any of these files, so we can continue to merge some files

In [6]:
major_univ = pd.merge(left=universities[['id_university','university_name']],right=majors[['id_major','major_name','id_university','capacity']],on='id_university')
major_univ # we have merged our data, so it will be easier to be read

Unnamed: 0,id_university,university_name,id_major,major_name,capacity
0,111,UNIVERSITAS SYIAH KUALA,1111014,PENDIDIKAN DOKTER HEWAN,88
1,111,UNIVERSITAS SYIAH KUALA,1111022,TEKNIK SIPIL,64
2,111,UNIVERSITAS SYIAH KUALA,1111037,TEKNIK MESIN,48
3,111,UNIVERSITAS SYIAH KUALA,1111045,TEKNIK KIMIA,48
4,111,UNIVERSITAS SYIAH KUALA,1111053,ARSITEKTUR,48
...,...,...,...,...,...
3162,921,UNIVERSITAS PAPUA,9212011,EKONOMI PEMBANGUNAN,32
3163,921,UNIVERSITAS PAPUA,9212042,MANAJEMEN,32
3164,921,UNIVERSITAS PAPUA,9212057,AKUNTANSI,32
3165,921,UNIVERSITAS PAPUA,9212065,PENDIDIKAN BAHASA INDONESIA,16


for science data

In [7]:
df_science = pd.merge(left=score_science,right=major_univ,left_on=['id_first_major','id_first_university'],right_on=['id_major','id_university'],suffixes=('_left','_right')) # merge first major and first university
df_science = df_science.rename(columns={'university_name':'first_univ','major_name':'first_major','capacity':'first_capacity'}).drop(['id_university','id_major'],axis=1) # then rename the columns name that have been merged and also drop the id
df_science = pd.merge(left=df_science,right=major_univ,left_on=['id_second_major','id_second_university'],right_on=['id_major','id_university'],suffixes=('_left','_right')) #  now second major and second university
df_science = df_science.rename(columns={'university_name':'second_univ','major_name':'second_major','capacity':'second_capacity'}).drop(['id_university','id_major','Unnamed: 0','id_first_major','id_first_university','id_second_major','id_second_university','id_user'],axis=1) # same like before

In [8]:
df_science # now we can just use a single file to get all of the information

Unnamed: 0,score_bio,score_fis,score_kim,score_kmb,score_kpu,score_kua,score_mat,score_ppu,first_univ,first_major,first_capacity,second_univ,second_major,second_capacity
0,400,400,400,400,400,400,400,400,INSTITUT TEKNOLOGI BANDUNG,SEKOLAH ILMU & TEKNO. HAYATI - PROG. SAINS,64,UNIVERSITAS PADJADJARAN,PSIKOLOGI,80
1,704,447,630,561,518,541,585,599,INSTITUT TEKNOLOGI BANDUNG,SEKOLAH ILMU & TEKNO. HAYATI - PROG. SAINS,64,UNIVERSITAS PADJADJARAN,PSIKOLOGI,80
2,715,532,633,579,732,804,402,608,INSTITUT TEKNOLOGI BANDUNG,SEKOLAH ILMU & TEKNO. HAYATI - PROG. SAINS,64,UNIVERSITAS PADJADJARAN,PSIKOLOGI,80
3,446,511,400,548,679,567,513,538,INSTITUT TEKNOLOGI BANDUNG,SEKOLAH ILMU & TEKNO. HAYATI - PROG. SAINS,64,UNIVERSITAS PADJADJARAN,PSIKOLOGI,80
4,489,533,367,481,487,544,499,469,INSTITUT TEKNOLOGI BANDUNG,SEKOLAH ILMU & TEKNO. HAYATI - PROG. SAINS,64,UNIVERSITAS PADJADJARAN,PSIKOLOGI,80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86223,336,466,476,511,378,393,589,470,UNIVERSITAS NEGERI GORONTALO,PENDIDIKAN GEOGRAFI,54,UNIVERSITAS NEGERI GORONTALO,PENDIDIKAN ILMU PENGETAHUAN ALAM,27
86224,520,483,654,545,606,722,425,495,UNIVERSITAS BORNEO TARAKAN,AKUAKULTUR,48,UNIVERSITAS BORNEO TARAKAN,AGRIBISNIS,48
86225,435,413,441,408,516,526,564,390,UNIVERSITAS SYIAH KUALA,AGROTEKNOLOGI (PSDKU GAYO LUES),20,UNIVERSITAS SYIAH KUALA,KEHUTANAN (PSDKU GAYO LUES),16
86226,482,516,453,546,569,597,587,633,UNIVERSITAS PALANGKARAYA,KIMIA,30,UNIVERSITAS PALANGKARAYA,FISIKA,30


In [9]:
# because the capacity of utbk is 40% from the maximum capacity so we need to change the capacity value
df_science['first_capacity'] = df_science['first_capacity'].apply(lambda x: int(x*40/100))
df_science['second_capacity'] = df_science['second_capacity'].apply(lambda x: int(x*40/100))

In [10]:
df_science['specialized_score']=df_science[['score_bio','score_fis','score_kim','score_mat']].mean(axis=1) #get avg score 
df_science['general_score'] = df_science[['score_kmb','score_kpu','score_kua','score_ppu']].mean(axis=1)  #get avg score
df_science['average_score'] = df_science.iloc[:,:8].mean(axis=1) # avg score

In [11]:
df_science.columns.values

array(['score_bio', 'score_fis', 'score_kim', 'score_kmb', 'score_kpu',
       'score_kua', 'score_mat', 'score_ppu', 'first_univ', 'first_major',
       'first_capacity', 'second_univ', 'second_major', 'second_capacity',
       'specialized_score', 'general_score', 'average_score'],
      dtype=object)

In [12]:
df_science = df_science[['score_bio', 'score_fis', 'score_kim','score_mat', 'score_kmb', 'score_kpu','score_kua', 'score_ppu','specialized_score','general_score','average_score','first_univ', 'first_major','first_capacity', 'second_univ', 'second_major', 'second_capacity']] #rearrange columns index 

In [13]:
df_science

Unnamed: 0,score_bio,score_fis,score_kim,score_mat,score_kmb,score_kpu,score_kua,score_ppu,specialized_score,general_score,average_score,first_univ,first_major,first_capacity,second_univ,second_major,second_capacity
0,400,400,400,400,400,400,400,400,400.00,400.00,400.000,INSTITUT TEKNOLOGI BANDUNG,SEKOLAH ILMU & TEKNO. HAYATI - PROG. SAINS,25,UNIVERSITAS PADJADJARAN,PSIKOLOGI,32
1,704,447,630,585,561,518,541,599,591.50,554.75,573.125,INSTITUT TEKNOLOGI BANDUNG,SEKOLAH ILMU & TEKNO. HAYATI - PROG. SAINS,25,UNIVERSITAS PADJADJARAN,PSIKOLOGI,32
2,715,532,633,402,579,732,804,608,570.50,680.75,625.625,INSTITUT TEKNOLOGI BANDUNG,SEKOLAH ILMU & TEKNO. HAYATI - PROG. SAINS,25,UNIVERSITAS PADJADJARAN,PSIKOLOGI,32
3,446,511,400,513,548,679,567,538,467.50,583.00,525.250,INSTITUT TEKNOLOGI BANDUNG,SEKOLAH ILMU & TEKNO. HAYATI - PROG. SAINS,25,UNIVERSITAS PADJADJARAN,PSIKOLOGI,32
4,489,533,367,499,481,487,544,469,472.00,495.25,483.625,INSTITUT TEKNOLOGI BANDUNG,SEKOLAH ILMU & TEKNO. HAYATI - PROG. SAINS,25,UNIVERSITAS PADJADJARAN,PSIKOLOGI,32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86223,336,466,476,589,511,378,393,470,466.75,438.00,452.375,UNIVERSITAS NEGERI GORONTALO,PENDIDIKAN GEOGRAFI,21,UNIVERSITAS NEGERI GORONTALO,PENDIDIKAN ILMU PENGETAHUAN ALAM,10
86224,520,483,654,425,545,606,722,495,520.50,592.00,556.250,UNIVERSITAS BORNEO TARAKAN,AKUAKULTUR,19,UNIVERSITAS BORNEO TARAKAN,AGRIBISNIS,19
86225,435,413,441,564,408,516,526,390,463.25,460.00,461.625,UNIVERSITAS SYIAH KUALA,AGROTEKNOLOGI (PSDKU GAYO LUES),8,UNIVERSITAS SYIAH KUALA,KEHUTANAN (PSDKU GAYO LUES),6
86226,482,516,453,587,546,569,597,633,509.50,586.25,547.875,UNIVERSITAS PALANGKARAYA,KIMIA,12,UNIVERSITAS PALANGKARAYA,FISIKA,12


for humanities data

In [14]:
df_humanites = pd.merge(left=score_humanities,right=major_univ,left_on=['id_first_major','id_first_university'],right_on=['id_major','id_university'],suffixes=('_left','_right')) # merge first major and first university
df_humanites = df_humanites.rename(columns={'university_name':'first_univ','major_name':'first_major','capacity':'first_capacity'}).drop(['id_university','id_major'],axis=1) # then rename the columns name that have been merged and also drop the id
df_humanites = pd.merge(left=df_humanites,right=major_univ,left_on=['id_second_major','id_second_university'],right_on=['id_major','id_university'],suffixes=('_left','_right')) #  now second major and second university
df_humanites = df_humanites.rename(columns={'university_name':'second_univ','major_name':'second_major','capacity':'second_capacity'}).drop(['id_university','id_major','Unnamed: 0','id_first_major','id_first_university','id_second_major','id_second_university','id_user'],axis=1) # same like before

In [15]:
df_humanites.head(3)

Unnamed: 0,score_eko,score_geo,score_kmb,score_kpu,score_kua,score_mat,score_ppu,score_sej,score_sos,first_univ,first_major,first_capacity,second_univ,second_major,second_capacity
0,778,486,679,594,643,686,559,392,676,INSTITUT TEKNOLOGI BANDUNG,FAKULTAS SENIRUPA DAN DESAIN (FSRD),100,UNIVERSITAS SEBELAS MARET,DESAIN KOMUNIKASI VISUAL,30
1,803,638,659,696,685,494,576,676,694,INSTITUT TEKNOLOGI BANDUNG,FAKULTAS SENIRUPA DAN DESAIN (FSRD),100,UNIVERSITAS SEBELAS MARET,DESAIN KOMUNIKASI VISUAL,30
2,601,560,575,611,629,509,607,775,629,INSTITUT TEKNOLOGI BANDUNG,FAKULTAS SENIRUPA DAN DESAIN (FSRD),100,UNIVERSITAS SEBELAS MARET,DESAIN KOMUNIKASI VISUAL,30


In [16]:
# because the capacity of utbk is 40% from the maximum capacity so we need to change the capacity value
df_humanites['first_capacity'] = df_humanites['first_capacity'].apply(lambda x: int(x*40/100))
df_humanites['second_capacity'] = df_humanites['second_capacity'].apply(lambda x: int(x*40/100))

In [17]:
df_humanites.columns

Index(['score_eko', 'score_geo', 'score_kmb', 'score_kpu', 'score_kua',
       'score_mat', 'score_ppu', 'score_sej', 'score_sos', 'first_univ',
       'first_major', 'first_capacity', 'second_univ', 'second_major',
       'second_capacity'],
      dtype='object')

In [18]:
df_humanites['specialized_score']=df_humanites[['score_eko','score_geo','score_sej','score_sos']].mean(axis=1) #get avg score 
df_humanites['general_score'] = df_humanites[['score_kmb','score_kpu','score_kua','score_ppu','score_mat']].mean(axis=1)  #get avg score
df_humanites['average_score'] = df_humanites.iloc[:,:8].mean(axis=1) # avg score

In [19]:
df_humanites = df_humanites[['score_eko', 'score_geo', 'score_sej','score_sos','score_mat', 'score_kmb', 'score_kpu','score_kua', 'score_ppu','specialized_score','general_score','average_score','first_univ', 'first_major','first_capacity', 'second_univ', 'second_major', 'second_capacity']] #rearrange columns index 

In [20]:
df_humanites.head()

Unnamed: 0,score_eko,score_geo,score_sej,score_sos,score_mat,score_kmb,score_kpu,score_kua,score_ppu,specialized_score,general_score,average_score,first_univ,first_major,first_capacity,second_univ,second_major,second_capacity
0,778,486,392,676,686,679,594,643,559,583.0,632.2,602.125,INSTITUT TEKNOLOGI BANDUNG,FAKULTAS SENIRUPA DAN DESAIN (FSRD),40,UNIVERSITAS SEBELAS MARET,DESAIN KOMUNIKASI VISUAL,12
1,803,638,676,694,494,659,696,685,576,702.75,622.0,653.375,INSTITUT TEKNOLOGI BANDUNG,FAKULTAS SENIRUPA DAN DESAIN (FSRD),40,UNIVERSITAS SEBELAS MARET,DESAIN KOMUNIKASI VISUAL,12
2,601,560,775,629,509,575,611,629,607,641.25,586.2,608.375,INSTITUT TEKNOLOGI BANDUNG,FAKULTAS SENIRUPA DAN DESAIN (FSRD),40,UNIVERSITAS SEBELAS MARET,DESAIN KOMUNIKASI VISUAL,12
3,612,494,617,548,548,524,549,589,508,567.75,543.6,555.125,INSTITUT TEKNOLOGI BANDUNG,FAKULTAS SENIRUPA DAN DESAIN (FSRD),40,UNIVERSITAS SEBELAS MARET,DESAIN KOMUNIKASI VISUAL,12
4,493,573,474,560,641,586,582,504,608,525.0,584.2,557.625,INSTITUT TEKNOLOGI BANDUNG,FAKULTAS SENIRUPA DAN DESAIN (FSRD),40,UNIVERSITAS SEBELAS MARET,DESAIN KOMUNIKASI VISUAL,12


visualization

In [98]:
a = df_science.drop(['second_univ',	'second_major',	'second_capacity'],axis=1).groupby('first_univ').size().sort_values(ascending=False).reset_index().head(10).rename(columns={0:'Total','first_univ':'Universities'}) #DF for science

b = df_humanites.drop(['second_univ',	'second_major',	'second_capacity'],axis=1).groupby('first_univ').size().sort_values(ascending=False).reset_index().head(10).rename(columns={0:'Total','first_univ':'Universities'}) #DF for humanit

In [104]:
fig = px.bar(data_frame=a,y='Universities',x='Total',color='Universities',text_auto=True,pattern_shape_sequence=["\\"],width=1300, color_discrete_sequence=px.colors.qualitative.Bold)
fig.update_layout(title='Top Universities Choices 2019 Based on First Choice for Science Majors')
fig.update_yaxes(tickangle=0, tickfont=dict(family='Rockwell', color='black', size=12))
fig.update_xaxes(tickangle=0, tickfont=dict(family='Rockwell', color='red', size=12))
fig.update_traces(textfont_size=11, textangle=0, textposition="outside", cliponaxis=True)
fig.show()

fig = px.bar(data_frame=b,y='Universities',x='Total',color='Universities',text_auto=True,pattern_shape_sequence=["/"],width=1300)
fig.update_layout(title='Top Universities Choices 2019 Based on First Choice for Humanities Majors')
fig.update_yaxes(tickangle=0, tickfont=dict(family='Rockwell', color='black', size=12))
fig.update_xaxes(tickangle=0, tickfont=dict(family='Rockwell', color='red', size=12))
fig.update_traces(textfont_size=11, textangle=0, textposition="outside", cliponaxis=True)
fig.show()

![Top Universities Choices 2019 Based on First Choice for Science Majors](photos/Top%20Universities%20Choices%202019%20Based%20on%20First%20Choice%20for%20Science%20Majors.png)`

![Top Universities Choices 2019 Based on First Choice for Humanities Majors](photos/Top%20Universities%20Choices%202019%20Based%20on%20First%20Choice%20for%20Humanities%20Majors.png)

In [25]:
df_univ_braw = df_science.loc[(df_science['first_univ']=='UNIVERSITAS BRAWIJAYA')].drop(['second_univ',	'second_major'	,'second_capacity'],axis=1)
df_univ_braw.head()

Unnamed: 0,score_bio,score_fis,score_kim,score_mat,score_kmb,score_kpu,score_kua,score_ppu,specialized_score,general_score,average_score,first_univ,first_major,first_capacity
89,721,559,548,524,635,614,630,588,588.0,616.75,602.375,UNIVERSITAS BRAWIJAYA,TEKNIK KOMPUTER,25
91,497,543,470,515,625,562,635,587,506.25,602.25,554.25,UNIVERSITAS BRAWIJAYA,PENDIDIKAN DOKTER,40
92,585,625,625,389,670,559,570,405,556.0,551.0,553.5,UNIVERSITAS BRAWIJAYA,PENDIDIKAN DOKTER,40
93,755,756,650,488,741,592,743,522,662.25,649.5,655.875,UNIVERSITAS BRAWIJAYA,PENDIDIKAN DOKTER,40
94,652,588,444,587,815,633,720,454,567.75,655.5,611.625,UNIVERSITAS BRAWIJAYA,PENDIDIKAN DOKTER,40


In [76]:
a = df_univ_braw.groupby('first_major').agg({'first_univ':'count','first_capacity':'mean'}).sort_values('first_univ',ascending=True).reset_index().rename(columns={'first_univ':'Total choices','first_capacity':'Capacity'})
a.head()

Unnamed: 0,first_major,Total choices,Capacity
0,SOSIAL EKONOMI PERIKANAN (PSDKU KEDIRI),20,8.0
1,PETERNAKAN (PSDKU KEDIRI),20,6.0
2,AKUAKULTUR (PSDKU KEDIRI),32,8.0
3,AGRIBISNIS (PSDKU KEDIRI),35,8.0
4,AGROEKOTEKNOLOGI (PSDKU KEDIRI),36,8.0


In [96]:
fig = px.bar(data_frame=a,y='first_major',x=['Capacity','Total choices'],barmode='group',height=1400,text_auto=True,pattern_shape_sequence=["x"],)
fig.update_layout(title='UB Science majors based on the first choice 2019',xaxis_title='',yaxis_title='Majors')
fig.update_traces(textfont_size=36, textangle=0, textposition="outside", cliponaxis=False)
fig.show()

![Science Major UB Top Choices 2019](/photos/Science%20Major%20UB%20Top%20Choices%202019.png)

In [28]:
df_univ_braw.columns

Index(['score_bio', 'score_fis', 'score_kim', 'score_mat', 'score_kmb',
       'score_kpu', 'score_kua', 'score_ppu', 'specialized_score',
       'general_score', 'average_score', 'first_univ', 'first_major',
       'first_capacity'],
      dtype='object')

In [29]:
def find_the_lowest_score(df):
    df_out = pd.DataFrame()
    for key,subdf in df.sort_values('average_score',ascending=False).groupby('first_major'):
        capacity = subdf['first_capacity'].iloc[0]
        data = subdf['average_score'].iloc[:capacity]
        data_specialized_score = subdf['specialized_score'].iloc[:capacity]
        data_general_score = subdf['general_score'].iloc[:capacity]
        major = subdf['first_major'].iloc[:capacity]
        data = pd.concat([data_general_score,data_specialized_score,data,major,],axis=1)
        t = pd.DataFrame(data)
        df_out = pd.concat([df_out,t],ignore_index=True)
    return df_out

In [30]:
df_univ_braw_clear = find_the_lowest_score(df_univ_braw)
df_univ_braw_clear

Unnamed: 0,general_score,specialized_score,average_score,first_major
0,609.50,671.75,640.625,AGRIBISNIS
1,622.75,603.25,613.000,AGRIBISNIS
2,660.25,565.00,612.625,AGRIBISNIS
3,613.50,610.50,612.000,AGRIBISNIS
4,567.75,655.25,611.500,AGRIBISNIS
...,...,...,...,...
1318,678.75,540.00,609.375,TEKNOLOGI INFORMASI
1319,607.75,610.25,609.000,TEKNOLOGI INFORMASI
1320,611.75,600.00,605.875,TEKNOLOGI INFORMASI
1321,680.50,531.00,605.750,TEKNOLOGI INFORMASI


In [31]:
fig = go.Figure()
fig.add_trace(go.Box(y=df_univ_braw_clear['specialized_score'], name='specialized_score',
                marker_color = 'indianred'))
fig.add_trace(go.Box(y=df_univ_braw_clear['general_score'], name = 'general_score',
                marker_color = 'lightseagreen'))
fig.add_trace(go.Box(y=df_univ_braw_clear['average_score'], name = 'average_score',
                marker_color = 'mediumvioletred'))
fig.update_layout(title_text='UB Distribution scores 2019')
fig.show()

![UB Distribution scores 2019](/photos/UB%20Distribution%20scores%202019.png)

In [32]:
fig = px.bar(data_frame=df_univ_braw_clear.groupby('first_major').mean().reset_index().sort_values('average_score',ascending=False),x='average_score',y='first_major',color='first_major',height=1200,width=1200,text_auto=True)
fig.update_layout(showlegend=False,title='Avg Score in Science Majors UB 2019',xaxis_title='Avg Score',yaxis_title='Majors',bargap=0.15,
bargroupgap=0.1)
# fig.update_traces(textfont_size=16, textangle=0, textposition="outside", cliponaxis=False)
fig.show()

![Avg Score in Science Majors UB 2019](/photos/Avg%20Score%20in%20Science%20Majors%20UB%202019.png)

In [33]:
fig = px.bar(data_frame=df_univ_braw_clear.groupby('first_major').mean().sort_values('average_score',ascending=False).head(5).reset_index(),x=['average_score','specialized_score','general_score'],y='first_major',text_auto=True,barmode='group',height=500)

fig.update_layout(showlegend=True,title='Top 5 Avg Score Majors in UB 2019',xaxis_title='Avg Scores',yaxis_title='Majors',bargap=0.15,
bargroupgap=0.1,yaxis={'categoryorder':'total ascending'})
fig.show()

![Top 5 Avg Score Majors in UB 2019](/photos/Top%205%20Avg%20Score%20Majors%20in%20UB%202019.png)

In [34]:
df_univ_braw_dokter = df_univ_braw_clear.loc[df_univ_braw_clear['first_major']=='PENDIDIKAN DOKTER'].sort_values(by=['average_score','specialized_score','general_score'],ascending=False).head(40)
df_univ_braw_dokter.head()

Unnamed: 0,general_score,specialized_score,average_score,first_major
652,657.25,810.25,733.75,PENDIDIKAN DOKTER
653,714.0,728.0,721.0,PENDIDIKAN DOKTER
654,657.75,782.25,720.0,PENDIDIKAN DOKTER
655,726.25,709.0,717.625,PENDIDIKAN DOKTER
656,677.0,757.75,717.375,PENDIDIKAN DOKTER


In [35]:


fig = make_subplots(rows=1, cols=2, shared_yaxes=True,subplot_titles=('Highest Scores','Lowest Scores'))

fig.add_trace(
    go.Bar(y=df_univ_braw_dokter.loc[:,'average_score',].iloc[:5],name='Highest Score',text=df_univ_braw_dokter.loc[:,'average_score'].iloc[:5]),
    row=1, col=1
)

fig.add_trace(
    go.Bar(y=df_univ_braw_dokter.loc[:,'average_score'].iloc[-5:],name='Lowest Score',text=df_univ_braw_dokter.loc[:,'average_score'].iloc[-5:]),
    row=1, col=2,
)
fig.update_layout(title_text='Medical School UB Avg Scores 2019',showlegend=False,)
# fig.update_xaxes(tickangle=0, tickfont=dict(family='Rockwell', color='black', size=14))

![Medical School UB Avg Scores 2019](/photos/Medical%20School%20UB%20Avg%20Scores%202019.png)