In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from wordcloud import WordCloud
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score, plot_confusion_matrix
from datetime import datetime
import warnings

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
warnings.filterwarnings("ignore", category = FutureWarning)

sns.set(style="darkgrid")

In [None]:
data = pd.read_csv('/content/collegePlace_new.csv')

data.head()

Unnamed: 0,Age,Gender,Stream,Internships,CGPA,PlacedOrNot
0,22,Male,Electronics And Communication,1,8,1
1,21,Female,Computer Science,0,7,1
2,22,Female,Information Technology,1,6,1
3,21,Male,Information Technology,0,8,1
4,22,Male,Mechanical,0,8,1


In [None]:
print(f"Shape of Dataframe is: {data.shape}")

Shape of Dataframe is: (2966, 6)


In [None]:
print('Datatype in Each Column\n')
pd.DataFrame(data.dtypes, columns=['Datatype']).rename_axis("Column Name")

Datatype in Each Column



Unnamed: 0_level_0,Datatype
Column Name,Unnamed: 1_level_1
Age,int64
Gender,object
Stream,object
Internships,int64
CGPA,int64
PlacedOrNot,int64


In [None]:
data.describe().T.style.bar(subset=['mean'], color='#205ff2').background_gradient(subset=['std'], cmap='Reds').background_gradient(subset=['50%'], cmap='coolwarm')

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Age,2966.0,21.48584,1.324933,19.0,21.0,21.0,22.0,30.0
Internships,2966.0,0.703641,0.740197,0.0,0.0,1.0,1.0,3.0
CGPA,2966.0,7.073837,0.967748,5.0,6.0,7.0,8.0,9.0
PlacedOrNot,2966.0,0.552596,0.49731,0.0,0.0,1.0,1.0,1.0


In [None]:
pd.DataFrame(data.isnull().sum(), columns=["Null Values"]).rename_axis("Column Name")

Unnamed: 0_level_0,Null Values
Column Name,Unnamed: 1_level_1
Age,0
Gender,0
Stream,0
Internships,0
CGPA,0
PlacedOrNot,0


In [None]:
fig = px.histogram(data, 'Age',
                   title="<b>Average Age of Student</b>")

fig.add_vline(x=data['Age'].mean(), line_width=2, line_dash="dash", line_color="red")

fig.show()

In [None]:
fig = px.histogram(data, 'Age',             
                   color = "Gender",
                   marginal = 'violin',
                   title = "<b>Average Age Gender wise</b>")

fig.update_traces(marker = {"opacity": 0.7})

fig.add_vline(x = data['Age'].mean(),
              line_width = 2,
              line_dash = "dash",
              line_color = "black")

In [None]:
pd.DataFrame(data['Gender'].value_counts()).rename({"Gender":"Counts"}, axis = 1).rename_axis("Gender")

Unnamed: 0_level_0,Counts
Gender,Unnamed: 1_level_1
Male,2475
Female,491


In [None]:
px.histogram(data, x = "Gender", title = "<b>Total Male and Female</b>", color = "Gender")

In [None]:
fig = px.pie(data, names = "Gender",
             title = "<b>Counts in Gender</b>",
             hole = 0.5, template = "plotly_dark")

fig.update_traces(textposition='inside',
                  textinfo='percent+label',
                  marker=dict(line=dict(color='#000000', width = 1.5)))

In [None]:
male = data[data['Gender'] == "Male"]
female = data[data['Gender'] == "Female"]

In [None]:
total_male = male.shape[0]
total_female = female.shape[0]

In [None]:
total_male_pass = male[male['PlacedOrNot'] == 1].shape[0]
total_female_pass = female[female['PlacedOrNot'] == 1].shape[0]

In [None]:
pass_male_percentage = np.round((total_male_pass * 100) / total_male,2)
pass_female_percentage = np.round((total_female_pass * 100) / total_female,2)

In [None]:
details = {"Total Male": [total_male],
             "Total Female": [total_female],
             "Total male pass" : [total_male_pass],
             "Total female pass" : [total_female_pass],
             "% of Passed Male" : [pass_male_percentage],
             "% of Passed Female" : [pass_female_percentage]}

In [None]:
details

{'Total Male': [2475],
 'Total Female': [491],
 'Total male pass': [1364],
 'Total female pass': [275],
 '% of Passed Male': [55.11],
 '% of Passed Female': [56.01]}

In [None]:
gender_wise = pd.DataFrame(details, index=["Detail"])
gender_wise.T

Unnamed: 0,Detail
Total Male,2475.0
Total Female,491.0
Total male pass,1364.0
Total female pass,275.0
% of Passed Male,55.11
% of Passed Female,56.01


In [None]:
fig = px.histogram(data_frame = data,
             x = "Stream",
             color="PlacedOrNot", title="<b>Counts of Stream</b>",
             pattern_shape_sequence=['x'],
             template='plotly_dark')

fig.update_layout(title_x = 0.5,
                  title_font = dict(size = 20),
                  uniformtext_minsize = 15)

In [None]:
cgpa_above_avg = data[data['CGPA'] > data['CGPA'].mean()]

cgpa_above_avg

Unnamed: 0,Age,Gender,Stream,Internships,CGPA,PlacedOrNot
0,22,Male,Electronics And Communication,1,8,1
3,21,Male,Information Technology,0,8,1
4,22,Male,Mechanical,0,8,1
11,22,Female,Electrical,1,8,1
13,21,Male,Computer Science,1,8,1
...,...,...,...,...,...,...
2951,21,Male,Computer Science,3,8,1
2952,23,Male,Mechanical,0,8,1
2954,23,Female,Computer Science,1,8,1
2956,22,Male,Computer Science,0,8,1


In [None]:
fig = px.histogram(data_frame = cgpa_above_avg,
                   x = 'CGPA',
                   color='PlacedOrNot',
                   title = "<b>Above Average CGPA Vs Placement</b>",
                   template='plotly')

fig.update_layout(bargap=0.2)

fig.show()

In [None]:
cgpa_below_avg = data[data['CGPA'] < data['CGPA'].mean()]

In [None]:
cgpa_below_avg

Unnamed: 0,Age,Gender,Stream,Internships,CGPA,PlacedOrNot
1,21,Female,Computer Science,0,7,1
2,22,Female,Information Technology,1,6,1
5,22,Male,Electronics And Communication,0,6,0
6,21,Male,Computer Science,0,7,0
7,21,Male,Information Technology,1,7,0
...,...,...,...,...,...,...
2960,23,Male,Mechanical,1,7,0
2961,23,Male,Information Technology,0,7,0
2962,23,Male,Mechanical,1,7,0
2963,22,Male,Information Technology,1,7,0


In [None]:
fig = px.histogram(data_frame = cgpa_below_avg,
                   x = 'CGPA',
                   color='PlacedOrNot',
                   title = "<b>Below Average CGPA Vs Placement</b>",
                   template='plotly_dark', barmode='group')

fig.update_layout(bargap=0.2)

fig.show()

In [None]:
stream_wise = data.groupby('Stream').agg({'Age':'mean',
                                          'Internships' : 'sum',                            
                                           "CGPA":'mean',
                                           'PlacedOrNot':'sum'})

stream_wise.style.highlight_max()

Unnamed: 0_level_0,Age,Internships,CGPA,PlacedOrNot
Stream,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Civil,21.44164,173,7.094637,146
Computer Science,21.559278,676,7.039948,452
Electrical,21.299401,203,7.080838,181
Electronics And Communication,21.410377,306,7.125,251
Information Technology,21.539797,509,7.073806,409
Mechanical,21.518868,220,7.063679,200


In [None]:
px.bar(data_frame=stream_wise, barmode='group',
       title = "<b>Stream wise Analyzing</b>",template="plotly_dark")

In [None]:
no_internship = data[data['Internships'] == 0]

no_internship

Unnamed: 0,Age,Gender,Stream,Internships,CGPA,PlacedOrNot
1,21,Female,Computer Science,0,7,1
3,21,Male,Information Technology,0,8,1
4,22,Male,Mechanical,0,8,1
5,22,Male,Electronics And Communication,0,6,0
6,21,Male,Computer Science,0,7,0
...,...,...,...,...,...,...
2956,22,Male,Computer Science,0,8,1
2958,23,Male,Computer Science,0,6,0
2959,23,Male,Information Technology,0,7,0
2961,23,Male,Information Technology,0,7,0


In [None]:
fig = px.histogram(data_frame = no_internship,
                   x = "PlacedOrNot",
                   color="PlacedOrNot",
                   title = "<b>No Internship Experience Vs Placement</b>",
                   template = 'plotly_dark')

fig.update_layout(bargap = 0.2)

fig.show()