In [286]:
import pandas as pd

In [287]:
df = pd.read_csv('./data/survey_results_public.csv')
df_schema = pd.read_csv('./data/survey_results_schema.csv')
pd.set_option('display.max_colwidth', None)

In [288]:
class ResultDfMaker:
    def __init__(self, df):
        self.df = df

    def count_values_by_column(self, column):
        return self.df[column].value_counts()

    def create_df_from_series(self, series):
        return series.reset_index()

    def give_columns(self, dataframe, column_one, column_two):
        dataframe.columns = (column_one, column_two)

    def fix_repeat(self, df, column):
        df[column] = df[column].str.split(';')
        df = df.explode(column)
        return df.groupby(column)['count'].sum().reset_index()

    def sort_values_by_column(self, df):
        return df.sort_values(by='count', ascending=False)

    def get_value_df_by_column(self, column):
        series = self.count_values_by_column(column)
        result_df = self.create_df_from_series(series)
        self.give_columns(result_df, column, 'count')
        result_df = self.fix_repeat(result_df, column)
        result_df = self.sort_values_by_column(result_df)
        self.value_df = result_df
        return result_df
        
    def get_percentages_df_by_column(self, column):
        result_df = self.get_value_df_by_column(column)
        result_df['count'] = (result_df['count'] / result_df['count'].sum()) * 100
        return result_df
        


In [289]:
# filtering according to country and purchase influence columns

high_influence_df = df[(df['Country'] == 'Iran, Islamic Republic of...') & (df['PurchaseInfluence'] == 'I have a great deal of influence')]

In [290]:
# filtering according to country and purchase influence columns

some_influence_df = df[(df['Country'] == 'Iran, Islamic Republic of...') & (df['PurchaseInfluence'] == 'I have some influence')]


In [291]:
high_result_maker = ResultDfMaker(high_influence_df)
some_result_maker = ResultDfMaker(some_influence_df)

In [292]:
high_influence_employment_df = high_result_maker.get_value_df_by_column('Employment')
high_influence_employment_df

Unnamed: 0,Employment,count
0,"Employed, full-time",80
3,"Independent contractor, freelancer, or self-employed",40
1,"Employed, part-time",10
2,I prefer not to say,2


In [293]:
some_influence_employment_df = some_result_maker.get_value_df_by_column('Employment')
some_influence_employment_df

Unnamed: 0,Employment,count
0,"Employed, full-time",150
3,"Independent contractor, freelancer, or self-employed",33
1,"Employed, part-time",10
2,I prefer not to say,1


In [294]:
high_influence_devtype = high_result_maker.get_value_df_by_column('DevType')
high_influence_devtype

Unnamed: 0,DevType,count
11,"Developer, full-stack",42
7,"Developer, back-end",28
19,Project manager,5
10,"Developer, front-end",4
8,"Developer, desktop or enterprise applications",4
16,Engineering manager,3
18,Product manager,3
3,Data scientist or machine learning specialist,2
22,"Senior Executive (C-Suite, VP, etc.)",2
21,Security professional,2


In [295]:
some_influence_devtype = some_result_maker.get_value_df_by_column('DevType')
some_influence_devtype

Unnamed: 0,DevType,count
7,"Developer, back-end",43
11,"Developer, full-stack",42
10,"Developer, front-end",23
13,"Developer, mobile",16
8,"Developer, desktop or enterprise applications",14
3,Data scientist or machine learning specialist,7
9,"Developer, embedded applications or devices",7
17,Other (please specify):,2
4,DevOps specialist,2
20,Scientist,2


In [296]:
high_influence_yearscodepro = high_result_maker.get_value_df_by_column('YearsCodePro')
high_influence_yearscodepro

Unnamed: 0,YearsCodePro,count
21,8,18
1,10,12
18,5,11
15,3,9
19,6,8
17,4,8
20,7,7
11,20,6
10,2,5
23,Less than 1 year,4


In [297]:
some_influence_yearscodepro = some_result_maker.get_value_df_by_column('YearsCodePro')
some_influence_yearscodepro

Unnamed: 0,YearsCodePro,count
17,5,32
14,3,19
16,4,17
18,6,16
19,7,15
9,2,12
20,8,12
1,10,10
3,12,8
2,11,5


In [298]:
high_influence_edlevel = high_result_maker.get_value_df_by_column('EdLevel')
high_influence_edlevel

Unnamed: 0,EdLevel,count
1,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",52
2,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",42
5,Some college/university study without earning a degree,7
4,"Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)",5
0,"Associate degree (A.A., A.S., etc.)",4
3,"Professional degree (JD, MD, Ph.D, Ed.D, etc.)",4
6,Something else,3


In [299]:
some_influence_edlevel = some_result_maker.get_value_df_by_column('EdLevel')
some_influence_edlevel

Unnamed: 0,EdLevel,count
1,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",91
2,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",51
6,Some college/university study without earning a degree,16
0,"Associate degree (A.A., A.S., etc.)",5
4,"Professional degree (JD, MD, Ph.D, Ed.D, etc.)",3
3,Primary/elementary school,2
5,"Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)",2
7,Something else,2


In [300]:
high_influence_age = high_result_maker.get_value_df_by_column('Age')
high_influence_age

Unnamed: 0,Age,count
1,25-34 years old,60
2,35-44 years old,30
0,18-24 years old,18
3,45-54 years old,7
4,Under 18 years old,2


In [301]:
some_influence_age = some_result_maker.get_value_df_by_column('Age')
some_influence_age

Unnamed: 0,Age,count
1,25-34 years old,96
2,35-44 years old,40
0,18-24 years old,31
3,45-54 years old,4
4,55-64 years old,1


In [302]:
high_influence_language = high_result_maker.get_percentages_df_by_column('LanguageHaveWorkedWith')
high_influence_language

Unnamed: 0,LanguageHaveWorkedWith,count
18,JavaScript,10.876623
29,Python,9.902597
15,HTML/CSS,9.74026
34,SQL,9.74026
38,TypeScript,7.954545
4,C#,5.681818
2,Bash/Shell (all shells),5.519481
26,PHP,5.194805
17,Java,4.383117
5,C++,3.733766
