# Campus Placement Analysis

### Setup

In [43]:
import pandas as pd
import numpy as np

import plotly
import plotly.graph_objs as go
from plotly.subplots import make_subplots
plotly.offline.init_notebook_mode(connected=True)

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df = pd.read_csv('Placement_Data_Full_Class.csv')

### The schema is as follows:

- ***sl_no***: Serial Number
- ***gender***: Gender- Male='M',Female='F'
- ***ssc_p***: Secondary Education percentage- 10th Grade
- ***ssc_b***: Board of Education- Central/ Others
- ***hsc_p***: Higher Secondary Education percentage- 12th Grade
- ***hsc_b***: Board of Education- Central/ Others
- ***hsc_s***: Specialization in Higher Secondary Education
- ***degree_p***: Degree Percentage
- ***degree_t***: Under Graduation(Degree type)- Field of degree education
- ***workex***: Work Experience
- ***etest_p***: Employability test percentage ( conducted by college)
- ***specialisation***: Post Graduation(MBA)- Specialization
- ***mba_p***: MBA percentage
- ***status***: Status of placement- Placed/Not placed
- ***salary***: Salary offered by corporate to candidates

In [4]:
df.describe()

Unnamed: 0,sl_no,ssc_p,hsc_p,degree_p,etest_p,mba_p,salary
count,215.0,215.0,215.0,215.0,215.0,215.0,148.0
mean,108.0,67.303395,66.333163,66.370186,72.100558,62.278186,288655.405405
std,62.209324,10.827205,10.897509,7.358743,13.275956,5.833385,93457.45242
min,1.0,40.89,37.0,50.0,50.0,51.21,200000.0
25%,54.5,60.6,60.9,61.0,60.0,57.945,240000.0
50%,108.0,67.0,65.0,66.0,71.0,62.0,265000.0
75%,161.5,75.7,73.0,72.0,83.5,66.255,300000.0
max,215.0,89.4,97.7,91.0,98.0,77.89,940000.0


### Question 1: Are higher grades helpful in finding a job?

In [9]:
df_grade = df.groupby(['status']).mean()[['hsc_p', 'degree_p', 'mba_p']].reset_index()

In [22]:
trace1 = go.Bar(
    x = ['High School', 'Bachelor', 'MBA'],
    y = df_grade[df_grade['status']=='Not Placed'].drop('status', axis=1).values[0],
    name = 'Not Placed'
)

trace2 = go.Bar(
    x = ['High School', 'Bachelor', 'MBA'],
    y = df_grade[df_grade['status']=='Placed'].drop('status', axis=1).values[0],
    name = 'Placed'
)

data = [trace1, trace2]

layout = go.Layout(
    yaxis = dict(title = 'Grade'),
    xaxis = dict(title = 'Stage'),
    title = 'Student Grades and Campus Placement')

fig = go.Figure(data=data, layout=layout)
fig.show()

**Answer**: We can see that those placed have slightly ***higher*** degrees that those not placed. Therefore, grades could be said to be important. 

### Question 2: Which MBA specializations are more employable and offer higher salaries?

In [26]:
df['status_num'] = df['status'].apply(lambda x: 1 if x == 'Placed' else 0)
df_spec = df.groupby(['specialisation']).mean()[['salary', 'status_num']]

In [49]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

trace1 = go.Bar(
    x = ['Marketing and Finance', 'Marketing and HR'],
    y = df_spec['salary'],
    name = 'Salary'
)

fig.add_trace(trace1, secondary_y=False)

trace2 = go.Scatter(
    x = ['Marketing and Finance', 'Marketing and HR'],
    y = df_spec['status_num'],
    name = 'Placement ratio'
)

fig.add_trace(trace2, secondary_y=True)

fig.update_layout(
    title_text="Salary and Placement versus Specialisation",
    showlegend = False
)

fig.update_xaxes(title_text="Specialisation")
fig.update_yaxes(title_text="Salary", secondary_y=False)
fig.update_yaxes(title_text="Placement", secondary_y=True)
fig.show()

**Answer**: From the graph above, it can be seen that more Finance students get jobs and they have a slightly higher salary rather than HR.

### Question 3: Are employability tests reliable?

In [56]:
df_empl = pd.DataFrame(df.groupby(['status']).mean()['etest_p']).reset_index()

In [59]:
trace1 = go.Bar(
    x = df_empl['status'],
    y = df_empl['etest_p'],
    name = 'Not Placed'
)

data = [trace1]

layout = go.Layout(
    yaxis = dict(title = 'Test Result'),
    xaxis = dict(title = 'Placement Status'),
    title = 'Employability Test and Campus Placement')

fig = go.Figure(data=data, layout=layout)
fig.show()

In [61]:
trace1 = go.Scatter(
    x = df['salary'],
    y = df['etest_p'],
    mode = 'markers'
)

data = [trace1]

layout = go.Layout(
    yaxis = dict(title = 'Employability Test'),
    xaxis = dict(title = 'Salary'),
    title = 'Employability Tests and Salary')

fig = go.Figure(data=data, layout=layout)
fig.show()

**Answer**: From the graphs above no inference can be made and therefore there is no effect of placement tests on salary or placement.