# Data Processing

In [1]:
import pandas as pd
import re

Load survey data and select our desired columns

In [2]:
cols = ['Country', 'WebframeHaveWorkedWith', 'WebframeWantToWorkWith']
survey = pd.read_csv('../data-raw/survey_results_public.csv', usecols=cols)
survey.dropna(thresh=2, subset=cols[1:], inplace=True)

survey.head(5)

Unnamed: 0,Country,WebframeHaveWorkedWith,WebframeWantToWorkWith
2,United Kingdom of Great Britain and Northern I...,Angular.js,Angular;Angular.js
3,Israel,ASP.NET;ASP.NET Core,ASP.NET;ASP.NET Core
4,United States of America,Angular;ASP.NET;ASP.NET Core ;jQuery;Node.js,Angular;ASP.NET Core ;Blazor;Node.js
6,India,Angular;Next.js;Node.js;React.js;Svelte;Vue.js,Django;Flask;Gatsby;jQuery;Next.js;Node.js;Rea...
7,India,jQuery;Node.js,Angular;Angular.js;Next.js;Vue.js


Split columns by semicolon delimitation

In [3]:
def split_column(value):
	return re.split(r'\s*;\s*', value.strip())

survey['WebframeHaveWorkedWith'] = survey['WebframeHaveWorkedWith'].map(split_column)
survey['WebframeWantToWorkWith'] = survey['WebframeWantToWorkWith'].map(split_column)

survey.head(3)

Unnamed: 0,Country,WebframeHaveWorkedWith,WebframeWantToWorkWith
2,United Kingdom of Great Britain and Northern I...,[Angular.js],"[Angular, Angular.js]"
3,Israel,"[ASP.NET, ASP.NET Core]","[ASP.NET, ASP.NET Core]"
4,United States of America,"[Angular, ASP.NET, ASP.NET Core, jQuery, Node.js]","[Angular, ASP.NET Core, Blazor, Node.js]"


Aggregate framework values into new series

In [4]:
def aggregate(col):
	series = pd.Series(dtype=int)
	for row in col:
		for data in row:
			if data in series:
				series[data] += 1
			else:
				series[data] = 1
	return series

frameworks_used = aggregate(survey['WebframeHaveWorkedWith'])
frameworks_future = aggregate(survey['WebframeWantToWorkWith'])

Combine series into new frameworks dataset

In [5]:
frameworks = pd.DataFrame([frameworks_used, frameworks_future])
frameworks = frameworks.transpose()
frameworks.columns = ['frameworks used', 'frameworks future']

frameworks.head(5)

Unnamed: 0,frameworks used,frameworks future
Angular.js,4286,2773
ASP.NET,7206,3848
ASP.NET Core,9177,8894
Angular,9850,8402
jQuery,13779,5937


Clean tags dataset and add missing columns

In [6]:
tags = pd.read_csv('../data-raw/tags.csv', thousands=',', index_col=0)

tags['answered posts'] = tags['total posts'] - tags['unanswered posts']
tags['solved posts'] = tags['total posts'] - tags['unsolved posts']

tags.head(5)

Unnamed: 0_level_0,total posts,unanswered posts,unsolved posts,answered posts,solved posts
framework,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Node.js,446823,86743,174210,360080,272613
React.js,424124,85670,173271,338454,250853
jQuery,1030603,101046,278193,929557,752410
Express,89495,17320,36176,72175,53319
Angular,286350,51845,111456,234505,174894


Join languages and tags datasets to frameworks

In [7]:
languages = pd.read_csv('../data-raw/languages.csv', index_col=0)

frameworks = frameworks.join([languages, tags], how='inner')

frameworks.head(5)

Unnamed: 0,frameworks used,frameworks future,language,total posts,unanswered posts,unsolved posts,answered posts,solved posts
Angular.js,4286,2773,JavaScript,262592,37781,90975,224811,171617
ASP.NET,7206,3848,C#,370740,43590,112937,327150,257803
ASP.NET Core,9177,8894,C#,73556,13878,26751,59678,46805
Angular,9850,8402,JavaScript,286350,51845,111456,234505,174894
jQuery,13779,5937,JavaScript,1030603,101046,278193,929557,752410


Finally, write datasets to new files to be used by the dashboard

In [8]:
frameworks.reset_index(level=0, names=['framework'], inplace=True)

frameworks.to_csv('../data/frameworks.csv')
