In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
survey=pd.read_csv('../input/kaggle-survey-2021/kaggle_survey_2021_responses.csv')


survey.info()

In [None]:
survey_subset = survey.filter(['Time from Start to Finish (seconds)','Q1','Q2','Q3','Q4','Q5','Q6','Q20','Q21','Q22','Q25'])
survey_subset.columns = ['Duration','Age','Gender','Country','Education','Job_Title','Experiance','Industry','company_size','DS_team_size','Compensation']
survey_sub = survey_subset.loc[1: , : ]
survey_sub['Duration'] = survey_sub['Duration'].astype('int64')
survey_sub

In [None]:
Q7= {}
for i in range(1,13):
    Q7.update(dict(survey[f"Q7_Part_{i}"].value_counts()))
Q7 = pd.DataFrame(Q7.items(), columns = ['Prog_language', 'counts'])
Q7 = Q7.iloc[::2].reset_index(drop=True)

In [None]:
Q8 = survey.filter(['Q8'])
Q8.columns = ['Recommended_Prog_language']
Q8=Q8.loc[1: , : ]
Q8 = Q8['Recommended_Prog_language'].value_counts().reset_index()
Q8 = Q8.sort_values(by='index')


# Age Distribution of who participated in the survey

In [None]:
f1=survey_sub['Age'].value_counts().reset_index()
f1=f1.sort_values(by='index')
Y=(f1['Age']/f1['Age'].sum())*100
myexplode=(0.25,0.1,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0)
plt.style.use("fivethirtyeight")
mylabel=f1['index']
colors={'#9b2226','#ae2012','#bb3e03','#ca6702','#ee9b00','#e9d8a6','#94d2bd',
'#0a9396','#005f73','#001219','#99d98c'}
plt.pie(Y,labels=mylabel,autopct="%1.1f%%",startangle=15,explode=myexplode,shadow=True,colors=colors)
plt.axis("equal")
plt.title("Age distribution on kaggle in 2021")
plt.legend(mylabel)
plt.gcf().set_size_inches(15,8)
plt.show()

# Gender Distribution

In [None]:
f2=survey_sub['Gender'].value_counts().reset_index()
f2=f2.sort_values(by='Gender')
x1 = f2['index']
y1 = (f2['Gender']/f2['Gender'].sum())*100
plt.figure(figsize=(15,10))
ax = plt.axes()
ax.set_facecolor("white")
plt.bar(x1,y1,color = '#42B300') # First set of data

plt.xlabel('$X$')
plt.ylabel('$Y$')
plt.title ('$Gender $ $ Distribution$')
for x,y in zip(x1,y1):
    plt.text(x, y+0.05, '%.2f' % y, ha='center' , va= 'bottom', fontsize = 10)
       
plt.show()

# Contribution of Each Country in the Survey

In [None]:
f3=survey_sub['Country'].value_counts().reset_index()
f3=f3.sort_values(by='Country')
x=f3['index']
y=f3['Country']
plt.style.use("fivethirtyeight")
plots=sns.barplot(data=f3,y=x,x=y,ci=None)
plt.ylabel("Countries")
plt.xlabel("% of respondents")
plt.title("Nationalities on kaggle in 2021")
plt.gcf().set_size_inches(12,14)
plt.show()

# Education Distribution

In [None]:
f3=survey_sub['Education'].value_counts().reset_index()
f3=f3.sort_values(by='Education')
x=f3['index']
y=f3['Education']
plt.style.use("fivethirtyeight")
plots=sns.barplot(data=f3,y=x,x=y,ci=None)
plt.ylabel("Education Level")
plt.xlabel("% of respondents")
plt.title("Education on kaggle in 2021")
plt.gcf().set_size_inches(18,14)
plt.show()

# Contributing Countries

In [None]:
visual_2_grp = survey_sub.groupby(['Job_Title'])['Duration'].count().reset_index()
#visual_2_grp.Job_Title = visual_2_grp.Job_Title.apply(lambda x : long_sentences_seperate(x,7))
visual_2 = px.treemap(data_frame=visual_2_grp,path=['Job_Title'],names='Job_Title',
                      values = 'Duration',title='Participating Job Titles')
visual_2.show()
del visual_2_grp

# Experiance Distribution

In [None]:
f4=survey_sub['Experiance'].value_counts().reset_index()
f4=f4.sort_values(by='index')
y=(f4['Experiance']/f4['Experiance'].sum())*100


plt.figure(figsize=(22,22))
plt.style.use('seaborn-deep')
myexplodes =(0.2,0.0,0.0,0.0,0.0,0.1,0.0)
color = ['#7CB342','#C0CA33','#FFB300','#F57C00']
mylabels=f4['index']

y.plot(kind = 'pie',labels = mylabels, autopct = '%1.0f%%',startangle=15, explode=myexplodes, colors=color, title = 'Exp Distirbutions',shadow='true')
plt.legend(loc="upper left")
plt.show()

# Global Participation Based on Gender

In [None]:
visual_3_grp = survey_sub.groupby(['Country','Gender']).sum().reset_index()
visual_3 = px.choropleth(visual_3_grp, locations="Country",color='Gender',
                         locationmode='country names',animation_frame='Gender',
                        title = 'Participation based on the Countries & Gender')
visual_3.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
visual_3.show()
del visual_3_grp

# Gender Age Distribution

In [None]:
blue_colors = ['#0099C6','#1CFFCE','rgb(179,226,205)','#E6E6E6']

df_q1_q2 = survey_sub.copy()
df_q1_q2 = pd.crosstab(df_q1_q2['Age'], df_q1_q2['Gender']).reset_index()

fig = go.Figure()
fig.add_trace(go.Bar(x=df_q1_q2['Age'], y=df_q1_q2['Man'], name = "Man",
                 marker_color = 'rgb(0, 134, 149)', text = df_q1_q2['Man'], textposition = "outside",
))
fig.add_trace(go.Bar(x=df_q1_q2['Age'], y=-df_q1_q2['Woman'], name = "Woman",
            marker_color = 'orange', text = df_q1_q2['Woman'], textposition = "outside"))
fig.add_trace(go.Bar(x=df_q1_q2['Age'], y=-df_q1_q2['Nonbinary'], name = "Nonbinary",
            marker_color = 'rgb(255,64,64)', text = df_q1_q2['Nonbinary'], textposition = "outside"))
fig.add_trace(go.Bar(x=df_q1_q2['Age'], y=-df_q1_q2['Prefer not to say'], name = "Prefer not to say",
            marker_color = 'rgb(138,54,15)', text = df_q1_q2['Prefer not to say'], textposition = "outside"))
fig.add_trace(go.Bar(x=df_q1_q2['Age'], y=-df_q1_q2['Prefer to self-describe'], name = "Prefer to self-describe",
            marker_color = 'rgb(102,102,102)', text = df_q1_q2['Prefer to self-describe'], textposition = "outside"))
fig.add_annotation(
        x=5.5,
        y=3598,
        xref="x",
        yref="y",
        text="Male participants form more than half of the survey",
        showarrow=False,
            yshift=10,
        bgcolor="#ffffff",
        opacity=0.8
        )

fig.add_shape(type='line',
                x0=-0.5,
                y0=0.31,
                x1=11,
                y1=0.31,
                line=dict(color='black', dash='dot'),
                xref='x',
                yref='paper'
)

fig.update_layout(barmode='relative',
    title_text='Gender Ratio based on age groups',
    height=500, title_x = 0.5, yaxis_title=" ", 
    legend=dict(orientation="h", yanchor="bottom", y=1, xanchor="center", x=0.5)
)

fig.update_xaxes(visible=True, categoryorder='total descending')
fig.update_yaxes(visible=False, range=[-2000,4500])

# As seen here Men and Women are the dominant genders in all Ages 
# so, here's their Age Distribution

In [None]:
survey_sub.loc[survey_sub['Gender'] == 'Man', 'clear_gender'] = 'man' 

survey_sub.loc[survey_sub['Gender'] == 'Woman', 'clear_gender'] = 'woman' 
gen_age = survey_sub.filter(['Age','clear_gender'])
gen_age_com = gen_age[~gen_age['clear_gender'].isnull()]
del gen_age
CrosstabResult=pd.crosstab(index=gen_age_com['Age'],columns=gen_age_com['clear_gender'])
print(CrosstabResult)
CrosstabResult.plot.bar(figsize=(15,10), rot=0)


# Gender Job Roles Distribution

In [None]:
VI_JobTit = survey_sub[survey_sub['Job_Title'].isin(['Student','Data Scientist','Software Engineer', 'Data Analyst', 'Business Analyst',
                                    'Data Engineer', 'Statistician', 'Machine Learning Engineer'])]


plt.figure(figsize = (15,12))
ax = sns.countplot(
    data = VI_JobTit,
    x = 'Job_Title',
    hue = 'clear_gender',
    palette = 'pastel' 
)
for bar in ax.patches:
    ax.annotate('{}'.format(bar.get_height()), (bar.get_x()+bar.get_width()/2,bar.get_height()), ha = 'center',
                color = 'white')
ax.set(xlabel = 'Job Roles',
       title = 'Data Science Survey takers Gender Dist',
       facecolor = '#2c2c2c')
plt.yticks([])
plt.xticks(rotation = '60')
plt.show()

# Gender educational Level distribution

In [None]:
plapla = survey_sub[survey_sub['Gender'].isin(['Man','Woman'])]
plt.figure(figsize = (20,12))
ax = sns.countplot(
    data = plapla,
    y = 'Education',
    hue = 'Gender',
    palette = 'rainbow' 
)
ax.set(ylabel = 'Education Level',
       title = 'Data Science Survey takers Gender Dist',
       facecolor = 'White')
plt.yticks(rotation = 0)
plt.xticks([])
plt.show()

# Profission by Gender Distribution

In [None]:
df_profession_male = survey_sub[survey_sub['Gender'] == 'Man'][['Gender', 'Job_Title']]
df_profession_male = df_profession_male['Job_Title'].value_counts().rename_axis('Profession').reset_index(name='Counts').sort_values(by=['Counts'], ascending=False)
df_profession_male['Gender'] = 'Man'

df_profession_female = survey_sub[survey_sub['Gender'] == 'Woman'][['Gender', 'Job_Title']]
df_profession_female = df_profession_female['Job_Title'].value_counts().rename_axis('Profession').reset_index(name='Counts').sort_values(by=['Counts'], ascending=False)
df_profession_female['Gender'] = 'Woman'

df_profession_other = survey_sub[(survey_sub['Gender'] != 'Man') & (survey_sub['Gender'] != 'Woman')][['Gender', 'Job_Title']]
df_profession_other = df_profession_other['Job_Title'].value_counts().rename_axis('Profession').reset_index(name='Counts').sort_values(by=['Counts'], ascending=False)
df_profession_other['Gender'] = 'Others'

df_gen_prof = pd.concat([df_profession_male, df_profession_female, df_profession_other], axis=0)
fig1 = px.funnel(df_gen_prof, x='Counts', y='Profession', color='Gender',
                       height=500, title='Profession by gender',
                       category_orders={'Gender': ['Man', 'Woman', 'Others']},
                       color_discrete_sequence=['teal', 'orange', 'lightblue'],
                       )
fig1.update_traces(textposition='inside')
fig1.update_layout(autosize=True,
                         margin=dict(t=110, b=50, l=70, r=40), title_x=0.5, title_y=0.92,
                         plot_bgcolor='white', paper_bgcolor='white', 
                         title_font=dict(size=21, color='#222A2A', family="Muli, sans-serif"),
                         font=dict(color='#222A2A'),
                         legend=dict(orientation="h", yanchor="bottom", y=1, xanchor="center", x=0.5))
fig1.update_layout(
                 margin=go.layout.Margin(
                            l=0, #left margin
                            r=0, #right margin
                        ))

# Country vs. Gender and Experiance

In [None]:
VI_countries = survey_sub[survey_sub['Country'].isin(['India', 'United States of America', 'Other', 'Japan', 'China', 'Brazil' , 'Russia', 'Nigeria'])]
dfb = VI_countries[VI_countries['Gender']=='Man']
dfb = pd.crosstab(dfb['Country'], dfb['Experiance'], margins=True, margins_name="Total").sort_values(by='Total', ascending=False).reset_index()
dfb = dfb.iloc[1:, :-1].reset_index(drop=True)
cm1 = sns.light_palette("teal", as_cmap=True)
dfb.style.background_gradient(axis=1, cmap=cm1)

dfg = VI_countries[VI_countries['Gender']=='Woman']
dfg = pd.crosstab(dfg['Country'], dfg['Experiance'], margins=True, margins_name="Total").sort_values(by='Total', ascending=False).reset_index()
dfg = dfg.iloc[1:, :-1].reset_index(drop=True)
cm2 = sns.light_palette("teal", as_cmap=True)
dfg.style.background_gradient(axis=1, cmap=cm2)

dfo = VI_countries[(VI_countries['Gender']!='Man')& (VI_countries['Gender']!='Woman')]
dfo = pd.crosstab(dfo['Country'], dfo['Experiance'], margins=True, margins_name="Total").sort_values(by='Total', ascending=False).reset_index()
dfo = dfo.iloc[1:, :-1].reset_index(drop=True)
#cm3 = sns.light_palette("teal", as_cmap=True)
#dfo.style.background_gradient(axis=1, cmap=cm2)




df_gender_wise = pd.merge(dfb, dfg, left_index=True, right_index=True)
df_gender_wise = pd.merge(df_gender_wise, dfo, left_index=True, right_index=True)
df_gender_wise.drop(['Country','Country_y' ], axis='columns', inplace=True)

df_gender_wise.rename(columns={"1-3 years_x": "1-3 years", 
                               '10-20 years_x': "1-3 years", '20+ years_x': "20+ years", 
                               '3-5 years_x': "3-5 years", '5-10 years_x': "5-10 years", 
                               '< 1 years_x': "< 1 years",  'I have never written code_x': "I have never written code",                             
                               "1-3 years_y": "1-3 years",
                               '10-20 years_y': "1-3 years", '20+ years_y': "20+ years", 
                               '3-5 years_y': "3-5 years", '5-10 years_y': "5-10 years", 
                               '< 1 years_y': "< 1 years", 'I have never written code_y': "I have never written code"
                              }, inplace = True)

df_gender_wise_1 = df_gender_wise.iloc[:, 0]
df_gender_wise_2 = df_gender_wise.iloc[:, 1:]

df_gender_wise_2.columns = pd.MultiIndex.from_product([['Men', 'Women', 'Other Genders'],['1-3 years', '10-20 years', '20+ years', '3-5 years', '5-10 years',
       '< 1 years', 'I have never written code']])

df_gender_wise = pd.concat([df_gender_wise_1, df_gender_wise_2], axis=1)
df_gender_wise = df_gender_wise.rename({'Country_x' : 'Country'}, axis=1)


cm1 = sns.light_palette("teal", as_cmap=True)
cm2 = sns.light_palette("orange", as_cmap=True)
cm3 = sns.light_palette("lightblue", as_cmap=True)


df_gender_wise.style.background_gradient(cmap=cm1, subset=[('Men', '1-3 years'),
                               ('Men', '10-20 years'),
                                 ('Men', '20+ years'),
                                 ('Men', '3-5 years'),
                                ('Men', '5-10 years'),
                                 ('Men', '< 1 years'),
                 ('Men', 'I have never written code')])\
    .background_gradient(cmap=cm2, subset=[('Women', '1-3 years'),
                             ('Women', '10-20 years'),
                               ('Women', '20+ years'),
                               ('Women', '3-5 years'),
                              ('Women', '5-10 years'),
                               ('Women', '< 1 years'),
               ('Women', 'I have never written code')])\
    .background_gradient(cmap=cm3, subset=[('Other Genders', '1-3 years'),
                     ('Other Genders', '10-20 years'),
                       ('Other Genders', '20+ years'),
                       ('Other Genders', '3-5 years'),
                      ('Other Genders', '5-10 years'),
                       ('Other Genders', '< 1 years'),
       ('Other Genders', 'I have never written code')])\
    .set_caption("Genders and their years of experience in the top 10 countries of participation")\
    .format(precision=2).set_properties(**{
        'width': '20px',
        'max-width': '20px',
        'font-size': '8pt'
    })

# Age vs. Education

In [None]:
z=survey_sub.groupby(['Age','Education']).size().unstack().fillna(0).astype('int16')
fig, ax = plt.subplots(figsize=(14, 12))
sns.heatmap(z.apply(lambda x: x/x.sum(), axis=1), xticklabels=True, yticklabels=True, cmap='YlOrBr', annot=True, linewidths=0.005, linecolor='black', annot_kws={"fontsize":12}, fmt='.4f', cbar=False)
plt.title('Planned/Current Education Distribution by Age', fontname = 'monospace', weight='bold')
labels = [item.get_text() for item in ax.get_xticklabels()]
labels[-1] = 'Some College/Uni Study'
ax.set_xticklabels(labels)
plt.xticks(fontsize=12, rotation=60)
plt.yticks(fontsize=12)
plt.xlabel("Education", fontname = 'monospace', weight='semibold')
plt.ylabel("Age", fontname = 'monospace', weight='semibold')
plt.show()
del z

# Education vs. Countries

In [None]:
z=VI_countries.groupby(['Country','Education']).size().unstack().fillna(0).astype('int16')
fig, ax = plt.subplots(figsize=(14, 14))
sns.heatmap(z.apply(lambda x: x/x.sum(), axis=1), xticklabels=True, yticklabels=True, cmap='YlOrBr', annot=True, linewidths=0.005, linecolor='black', annot_kws={"fontsize":12}, fmt='.4f', cbar=False)
plt.title('Planned/Current Education Distribution by Country', fontname = 'monospace', weight='bold')
labels = [item.get_text() for item in ax.get_xticklabels()]
labels[-1] = 'Some College/Uni Study'
ax.set_xticklabels(labels)
plt.xticks(fontsize=12,rotation=45)
plt.yticks(fontsize=9)
plt.xlabel("Education", fontname = 'monospace', weight='semibold')
plt.ylabel("Country", fontname = 'monospace', weight='semibold')
plt.show()
del z

# Country-Age-Education Distribution for Countries With High Number of Participants

In [None]:
VI_countries['edu_cnt'] = VI_countries.groupby('Education')['Education'].transform('count')
VI_countries['ctr_cnt'] = VI_countries.groupby('Country')['Country'].transform('count')
VI_countries['Education'].replace({'Some college/university study without earning a bachelor’s degree':'Some Uni Degrees'}, inplace=True)
fig = px.treemap(VI_countries, path=[px.Constant("Countries"),'Country',
                                                                 'Age',
                                                                 'Education'],
                 values='edu_cnt', color='Country',
                 
                 title="Country-Age-Education Distribution for Countries With High Number of Participants")
fig.update_traces(textinfo='label+percent parent')
fig.update_layout(margin=dict(t=50, l=0, r=0, b=0))
fig.show()

# Job Title vs. Countries

In [None]:
VI_countries['job_cnt'] = VI_countries.groupby('Job_Title')['Job_Title'].transform('count')
VI_countries['ctr_cnt'] = VI_countries.groupby('Country')['Country'].transform('count')

fig = px.treemap(VI_countries, path=[px.Constant("Countries"),'Country','Job_Title'],
                 values='job_cnt', color='Country',
                 
                 title="Country-JobRoles Distribution for Countries With High Number of Participants")
fig.update_traces(textinfo='label+percent parent')
fig.update_layout(margin=dict(t=50, l=0, r=0, b=0))
fig.show()

# Education vs. Job_Title

In [None]:
VI_educ = VI_JobTit[VI_JobTit['Education'].isin(['Master\'s degree' , 'Bachelr\'s degree', 'Doctoral degree', 'Some college/university study without earning a bachelor\'s degree'])]

plt.figure(figsize = (25, 10))
ax = sns.countplot(data = VI_JobTit,  x = 'Job_Title',  hue = 'Education',  palette = 'plasma')
for bar in ax.patches:
    ax.annotate('{}'.format(bar.get_height()),   (bar.get_x()+bar.get_width()/2,bar.get_height()),  ha = 'center',
                color = 'black')
ax.set(xlabel = 'Job_Title',
       title = 'Eductional Level of each Job Title',
       facecolor = 'white')
plt.yticks([])
plt.xticks(rotation = '20')
plt.show()

# Industries of the Kagglers Contributing in this Survey

In [None]:
myexplodes =(0.22,0.15,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0)
plt.figure(figsize=(15,15))
plt.style.use('seaborn-dark')
x = survey_sub['Industry'].value_counts()
plt.pie (x, labels = x.index , startangle=20,explode =myexplodes,  autopct='%1.1f%%',pctdistance=0.85, shadow='true')
white_circle=plt.Circle( (0,0), 0.5, color='white') # Adding white circle at the centre
p=plt.gcf()
p.gca().add_artist(white_circle)
plt.show()

# Companies Sizes

In [None]:
visual_grp_6 = survey_sub.groupby(['company_size'])['Duration'].count().reset_index()
visual_6 = px.bar(data_frame=visual_grp_6,y='company_size',x='Duration'
                  ,title='Kagglers Company Size distribution',text='Duration', labels={'company_size':'Company Size', 'Duration': 'No of Companies'})
visual_6.update_traces(marker_color='rgb(150,202,25)') #This brings in the color for the bars
visual_6.update_layout(yaxis={'categoryorder':'total descending'})
visual_6.show()

# Data Science Team Sizes 

In [None]:
plt.figure(figsize=(10,10))
plt.style.use('ggplot')
x = survey_sub['DS_team_size'].value_counts()
plt.pie (x, labels = x.index , startangle=20,  autopct='%1.1f%%',pctdistance=0.85, shadow='true')
white_circle=plt.Circle( (0,0), 0.5, color='white') # Adding white circle at the centre
p=plt.gcf()
p.gca().add_artist(white_circle)
plt.show()

# Comany's Size in Each Industry

In [None]:
df_Ind_Com = survey_sub[['Industry', 'company_size']]
fig = px.box(df_Ind_Com, x="company_size", y="Industry", notched=True,)
fig.update_layout(title_text ="Company Size vs Industry Sector", title_x = 0.5, title_y = 0.93)
fig.update_xaxes( categoryorder='total ascending')
 
fig.show()

# Data Scientists Compensation

In [None]:
visual_10_grp = survey_sub.groupby(['Compensation'])['Duration'].count().reset_index()
#visual_2_grp.Job_Title = visual_2_grp.Job_Title.apply(lambda x : long_sentences_seperate(x,7))
visual_10 = px.treemap(data_frame=visual_10_grp,path=['Compensation'],names='Compensation',
                      values = 'Duration',title='Participants Compensation')
visual_10.show()
del visual_10_grp

# Compensation based on Country and Experiance

In [None]:
df = survey_sub
df_456 = df[['Country', 'Experiance', 'Compensation']]
df_456['Experiance'] = df_456['Experiance'].replace(dict.fromkeys(['I have never written code', '< 1 years', '1-3 years'],'Beginner'))
df_456['Experiance'] = df_456['Experiance'].replace(dict.fromkeys(['3-5 years', '5-10 years'],'Intermediate'))
df_456['Experiance'] = df_456['Experiance'].replace(dict.fromkeys(['10-20 years', '20+ years'],'Expert'))
df_456 = df_456[df_456['Country'].isin(['India','United States of America'])]
df_456 = df_456[df_456['Compensation'].isin(['$0-999','1,000-1,999','10,000-14,999','30,000-39,999','100,000-124,999'])]

df_456 = round(pd.crosstab(df_456['Compensation'], [df_456['Experiance'],df_456['Country']], normalize='index'), 2).T.reset_index()

df_456_India = df_456[df_456['Country']=='India'].sort_values(by=["Experiance"]).reset_index().drop('index', axis=1)
df_456_USA = df_456[df_456['Country']=='United States of America'].sort_values(by=["Experiance"]).reset_index().drop('index', axis=1)


fig = make_subplots(rows=1, cols=5, shared_yaxes=True, horizontal_spacing=0, vertical_spacing=0)                    
fig.add_trace(go.Bar(y=df_456_India['Experiance'], x=df_456_India['$0-999'], marker_color='teal', name='very ver Low Salary',
                     showlegend=False, orientation='h', opacity=0.8),
                     row=1, col=1)
fig.add_trace(go.Bar(y=df_456_USA['Experiance'], x=df_456_USA['$0-999'], marker_color='orange', name='very ver Low Salary',
                     orientation='h', showlegend=False, opacity=0.8),
                     row=1, col=1)




fig.add_trace(go.Bar(y=df_456_India['Experiance'], x=df_456_India['1,000-1,999'], marker_color='teal', name='Very Low Salary',
                     showlegend=False, orientation='h', opacity=0.8),
                     row=1, col=2)
fig.add_trace(go.Bar(y=df_456_USA['Experiance'], x=df_456_USA['1,000-1,999'], marker_color='orange', name='Very Low Salary',
                     orientation='h', showlegend=False, opacity=0.8),
                     row=1, col=2)


fig.add_trace(go.Bar(y=df_456_India['Experiance'], x=df_456_India['10,000-14,999'], marker_color='teal', name='Medium Salary',
                     showlegend=False, orientation='h', opacity=0.8),
                     row=1, col=3)
fig.add_trace(go.Bar(y=df_456_USA['Experiance'], x=df_456_USA['10,000-14,999'], marker_color='orange', name='Medium Salary',
                     orientation='h', showlegend=False, opacity=0.8),
                     row=1, col=3)


fig.add_trace(go.Bar(y=df_456_India['Experiance'], x=df_456_India['30,000-39,999'], marker_color='teal', name='High Salary',
                     showlegend=False, orientation='h', opacity=0.8),
                     row=1, col=4)
fig.add_trace(go.Bar(y=df_456_USA['Experiance'], x=df_456_USA['30,000-39,999'], marker_color='orange', name='High Salary',
                     orientation='h', showlegend=False, opacity=0.8),
                     row=1, col=4)



fig.add_trace(go.Bar(y=df_456_India['Experiance'], x=df_456_India['100,000-124,999'], marker_color='teal', name='Very High Salary',
                     showlegend=False, orientation='h', opacity=0.8),
                     row=1, col=5)
fig.add_trace(go.Bar(y=df_456_USA['Experiance'], x=df_456_USA['100,000-124,999'], marker_color='orange', name='Very High Salary', 
                     orientation='h', showlegend=False, opacity=0.8),
                     row=1, col=5)




fig.update_xaxes(zeroline=False,showticklabels=False, ticks="")
fig.update_traces(hovertemplate=None, marker=dict(line=dict(width=0)))
fig.update_yaxes(tickmode='array', showline=False, showgrid=False,
                 tickvals=['Beginner', 'Intermediate',
                       'Expert'],
                 ticktext=['Beginner', 'Intermediate',
                       'Expert'])
fig.update_layout(height=550, 
                  title_text="Compensation based on Experiance and Country", title_x =0.5, title_y = 0.96,
                  template="plotly_white", barmode='stack',
                  autosize=True,
                  margin=dict(t=80, b=50, l=70, r=40),
                 plot_bgcolor='white', paper_bgcolor='white', 
                title_font=dict(size=21, color='#222A2A', family="Muli, sans-serif"),
                         font=dict(color='#222A2A'),
                         legend=dict(orientation="h", yanchor="bottom", y=1, xanchor="center", x=0.5)
                 )


fig['layout']['xaxis'].update(title_text='0-999', title_font=dict(size=12))
fig['layout']['xaxis2'].update(title_text='1,000-1,999', title_font=dict(size=12))
fig['layout']['xaxis3'].update(title_text='10,000-14,999', title_font=dict(size=12))
fig['layout']['xaxis4'].update(title_text='30,000-39,999', title_font=dict(size=12))
fig['layout']['xaxis5'].update(title_text='100,000-124,999', title_font=dict(size=12))


fig.show()

# Preferred Programmong Language Distributions

In [None]:
blue_colors = ['#19D3F3', '#17BECF', '#0099C6', '#1CFFCE', '#2ED9FF', '#00B5F7', '#0DF9FF', '#22FFA7', 
    'rgb(102, 197, 204)', 'rgb(56, 166, 165)', 'rgb(47, 138, 196)', '#2ED9FF']

fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "pie"}]])
fig.add_trace(go.Bar(
     x=Q7['Prog_language'], y=Q7['counts'], showlegend=False,
    text =Q7['counts'],
     name="Programming language"), 
     row=1, col=1)
#fig.update_layout(uniformtext_minsize=8)
fig.update_traces(textposition='outside', marker_color=blue_colors, marker_line_color='rgb(8,48,107)',  marker_line_width=1.5, opacity=0.6)

# pull is given as a fraction of the pie radius
fig.add_trace(go.Pie(
     values=Q7['counts'],
     labels=Q7['Prog_language'], pull=[0.08, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05], hole=.3, 
    marker_colors=blue_colors, opacity=0.6,
     name="Programming language"),
    row=1, col=2)


#fig.update_traces(opacity=0.6)

fig.update_layout(title_text='Preferred programming language by participants', title_x=0.5, title_y=0.95,
                 legend=dict(orientation="h", yanchor="bottom", y=1, xanchor="center", x=0.5), height= 350)
fig.show()

# Recommended Programming Language distribution

In [None]:
blue_colors = ['#0099C6', '#19D3F3', '#17BECF', '#1CFFCE', '#2ED9FF', '#00B5F7', '#0DF9FF', '#22FFA7', 
    'rgb(102, 197, 204)', 'rgb(56, 166, 165)', 'rgb(47, 138, 196)', '#2ED9FF']

fig1 = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "pie"}]])
fig1.add_trace(go.Bar(
     x=Q8['index'], y=Q8['Recommended_Prog_language'], showlegend=False,
    text =Q8['Recommended_Prog_language'],
     name="Recommended Programming language"), 
     row=1, col=1)
#fig.update_layout(uniformtext_minsize=8)
fig1.update_traces(textposition='outside', marker_color=blue_colors, marker_line_color='rgb(8,48,107)',  marker_line_width=1.5, opacity=0.6)

# pull is given as a fraction of the pie radius
fig1.add_trace(go.Scatter(x=Q8['index'], y=Q8['Recommended_Prog_language'], showlegend=False, line = dict(
        color = 'red',
        width = 1.5)), row=1, col=1)

fig1.add_trace(go.Pie(
     values=Q8['Recommended_Prog_language'],
     labels=Q8['index'], pull=[0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05,0.05, 0.05, 0.05], hole=.3, 
    marker_colors=blue_colors, opacity=0.6,
     name="Coding Recom"),
    row=1, col=2)


#fig.update_traces(opacity=0.6)

fig1.update_layout(title_text='Recommended programming language by participants', title_x=0.5, title_y=0.95,
                 legend=dict(orientation="h", yanchor="bottom", y=1, xanchor="center", x=0.5), height= 350)
fig1.show()