In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('./data/survey_results_public.csv')
df_schema = pd.read_csv('./data/survey_results_schema.csv')
pd.set_option('display.max_colwidth', None)

In [3]:
class ResultDfMaker:
    def __init__(self, df):
        self.df = df

    def count_values_by_column(self, column):
        return self.df[column].value_counts()

    def create_df_from_series(self, series):
        return series.reset_index()

    def give_columns(self, dataframe, column_one, column_two):
        dataframe.columns = (column_one, column_two)

    def fix_repeat(self, df, column):
        df[column] = df[column].str.split(';')
        df = df.explode(column)
        return df.groupby(column)['count'].sum().reset_index()

    def sort_values_by_column(self, df):
        return df.sort_values(by='count', ascending=False)

    def get_value_df_by_column(self, column):
        series = self.count_values_by_column(column)
        result_df = self.create_df_from_series(series)
        self.give_columns(result_df, column, 'count')
        result_df = self.fix_repeat(result_df, column)
        result_df = self.sort_values_by_column(result_df)
        return result_df        

In [4]:
# filtering according to country and purchase influence columns

high_influence_df = df[(df['Country'] == 'Iran, Islamic Republic of...') & (df['PurchaseInfluence'] == 'I have a great deal of influence')]

In [5]:
# filtering according to country and purchase influence columns

some_influence_df = df[(df['Country'] == 'Iran, Islamic Republic of...') & (df['PurchaseInfluence'] == 'I have some influence')]


In [6]:
high_result_maker = ResultDfMaker(high_influence_df)
some_result_maker = ResultDfMaker(some_influence_df)

In [7]:
high_influence_employment_df = high_result_maker.get_value_df_by_column('Employment')
high_influence_employment_df

Unnamed: 0,Employment,count
0,"Employed, full-time",80
3,"Independent contractor, freelancer, or self-employed",40
1,"Employed, part-time",10
2,I prefer not to say,2


In [8]:
some_influence_employment_df = some_result_maker.get_value_df_by_column('Employment')
some_influence_employment_df

Unnamed: 0,Employment,count
0,"Employed, full-time",150
3,"Independent contractor, freelancer, or self-employed",33
1,"Employed, part-time",10
2,I prefer not to say,1


In [9]:
high_influence_devtype = high_result_maker.get_value_df_by_column('DevType')
high_influence_devtype

Unnamed: 0,DevType,count
11,"Developer, full-stack",42
7,"Developer, back-end",28
19,Project manager,5
10,"Developer, front-end",4
8,"Developer, desktop or enterprise applications",4
16,Engineering manager,3
18,Product manager,3
3,Data scientist or machine learning specialist,2
22,"Senior Executive (C-Suite, VP, etc.)",2
21,Security professional,2


In [10]:
some_influence_devtype = some_result_maker.get_value_df_by_column('DevType')
some_influence_devtype

Unnamed: 0,DevType,count
7,"Developer, back-end",43
11,"Developer, full-stack",42
10,"Developer, front-end",23
13,"Developer, mobile",16
8,"Developer, desktop or enterprise applications",14
3,Data scientist or machine learning specialist,7
9,"Developer, embedded applications or devices",7
17,Other (please specify):,2
4,DevOps specialist,2
20,Scientist,2


In [11]:
high_influence_yearscodeprp = high_result_maker.get_value_df_by_column('YearsCodePro')
high_influence_yearscodepro

NameError: name 'high_influence_yearscodepro' is not defined

In [None]:
some_influence_yearscodeprp = some_result_maker.get_value_df_by_column('YearsCodePro')
some_influence_yearscodepro

In [None]:
high_influence_edlevel = high_result_maker.get_value_df_by_column('EdLevel')
high_influence_edlevel

In [None]:
some_influence_edlevel = some_result_maker.get_value_df_by_column('EdLevel')
some_influence_edlevel

In [None]:
high_influence_age = high_result_maker.get_value_df_by_column('Age')
high_influence_age

In [None]:
some_influence_age = some_result_maker.get_value_df_by_column('Age')
some_influence_age