In [30]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict

In [6]:
df = pd.read_csv('./developer_survey_2020/survey_results_public.csv')
schema_df = pd.read_csv('./developer_survey_2020/survey_results_schema.csv')

In [7]:
df.head()

Unnamed: 0,Respondent,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
0,1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27.0
1,2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,...,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4.0
2,3,I code primarily as a hobby,Yes,,15,,,,Russian Federation,,...,Neither easy nor difficult,Appropriate in length,,,,,Somewhat more welcome now than last year,,4,
3,4,I am a developer by profession,Yes,25.0,18,,,,Albania,Albanian lek,...,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4.0
4,5,"I used to be a developer by profession, but no...",Yes,31.0,16,,,,United States,,...,Easy,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15,8.0


In [8]:
schema_df.head()

Unnamed: 0,Column,QuestionText
0,Respondent,Randomized respondent ID number (not in order ...
1,MainBranch,Which of the following options best describes ...
2,Hobbyist,Do you code as a hobby?
3,Age,What is your age (in years)? If you prefer not...
4,Age1stCode,At what age did you write your first line of c...


# What are the most popular tools used by developers in 2020?

In [81]:
df['LanguageWorkedWith']

0                                   C#;HTML/CSS;JavaScript
1                                         JavaScript;Swift
2                                 Objective-C;Python;Swift
3                                                      NaN
4                                        HTML/CSS;Ruby;SQL
                               ...                        
64456                                                  NaN
64457    Assembly;Bash/Shell/PowerShell;C;C#;C++;Dart;G...
64458                                                  NaN
64459                                             HTML/CSS
64460                      C#;HTML/CSS;Java;JavaScript;SQL
Name: LanguageWorkedWith, Length: 64461, dtype: object

In [33]:
languages = ['Assembly', 'Bash/Shell/PowerShell', 'C','C#','C++','Dart' ,'Go', 'Haskell',
             'HTML/CSS', 'Java', 'JavaScript', 'Julia','Kotlin', 'Objective-C', 'Perl',
             'PHP', 'Python', 'R']

In [79]:
def clean_counts(df, col, values, col1, col2 = 'counts'):
    
    raw_df = df[col].value_counts().reset_index()
    clean_df = defaultdict(int)
    
    for val in values:    
        for index, row in raw_df.iterrows():
            if val in list(row)[0]:
                clean_df[val] += int(list(row)[1])
    
    clean_df = pd.DataFrame(pd.Series(clean_df)).reset_index()
    clean_df.columns = [col1, col2]
    clean_df = clean_df.sort_values(col2, ascending= False).reset_index(drop=True)
    
    return clean_df
    
            

In [80]:
clean_counts(df,'LanguageWorkedWith', languages, 'language')

Unnamed: 0,language,counts
0,C,46769
1,Java,45749
2,JavaScript,38822
3,HTML/CSS,36181
4,Python,25287
5,Bash/Shell/PowerShell,18980
6,C#,18041
7,PHP,15007
8,C++,13707
9,R,9293


In [85]:
clean_counts(df,'LanguageDesireNextYear', languages, 'language')

Unnamed: 0,language,counts
0,C,33645
1,Java,31671
2,Python,26682
3,JavaScript,26188
4,HTML/CSS,20771
5,R,16649
6,C#,13674
7,Go,12605
8,Bash/Shell/PowerShell,11728
9,C++,9756


# What are the most popular database environments used in 2020?

In [82]:
databases = ['Cassandra', 'Couchbase', 'DynamoDB', 'Elasticsearch', 'Firebase',
             'IBM DB2', 'MariaDB', 'Microsoft', 'SQL Server', 'MongoDB', 'MySQL', 
             'Oracle']

In [83]:
clean_counts(df,'DatabaseWorkedWith', databases, 'database')

Unnamed: 0,language,counts
0,MySQL,27559
1,Microsoft,16336
2,SQL Server,16336
3,MongoDB,13086
4,MariaDB,8312
5,Oracle,8155
6,Firebase,7128
7,Elasticsearch,6817
8,DynamoDB,3497
9,Cassandra,1654


# What are the most desired databases for next year?

In [84]:
clean_counts(df,'DatabaseDesireNextYear', databases, 'database')

Unnamed: 0,language,counts
0,MongoDB,16024
1,MySQL,15734
2,Elasticsearch,10269
3,Microsoft,9876
4,SQL Server,9876
5,Firebase,8600
6,MariaDB,6126
7,Oracle,4794
8,DynamoDB,4773
9,Cassandra,4227


# What are the most used platforms?

In [91]:
df['PlatformDesireNextYear']

0           Android;iOS;Kubernetes;Microsoft Azure;Windows
1                               iOS;Kubernetes;Linux;MacOS
2                                                      NaN
3                                                      NaN
4        Docker;Google Cloud Platform;Heroku;Linux;Windows
                               ...                        
64456                                                  NaN
64457                                                  NaN
64458                                                  NaN
64459                                                  NaN
64460                   Arduino;Linux;Raspberry Pi;Windows
Name: PlatformDesireNextYear, Length: 64461, dtype: object

In [88]:
platforms = ['Android','Arduino', 'AWS', 'Docker', 'Google Cloud Platform', 'Heroku', 
             'IBM Cloud or Watson iOS', 'Kubernetes', 'Linux', 'MacOS', 'Microsoft Azure', 
             'Raspberry Pi', 'Slack Apps and Integrations Windows'] 

In [92]:
clean_counts(df,'PlatformWorkedWith', platforms, 'platforms')

Unnamed: 0,platforms,counts
0,Linux,29600
1,Docker,18851
2,AWS,14389
3,Android,14101
4,MacOS,12898
5,Raspberry Pi,8010
6,Microsoft Azure,7830
7,Google Cloud Platform,7569
8,Kubernetes,6178
9,Heroku,5974


In [93]:
clean_counts(df,'PlatformDesireNextYear', platforms, 'platforms')

Unnamed: 0,platforms,counts
0,Linux,27475
1,Docker,23458
2,AWS,18381
3,Android,15085
4,Kubernetes,14009
5,MacOS,11793
6,Google Cloud Platform,11648
7,Raspberry Pi,11614
8,Microsoft Azure,9816
9,Arduino,6895


In [None]:
if 'data' or 'Data'in 