In [6]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns

In [9]:
# Build the URL for loading the data from Google Sheets:
suffix = "/export?format=csv"
sheets_url = "https://docs.google.com/spreadsheets/d/1qHgROQBNqbQVRUeVdFmH2exEsVsgM9Uof9DnYbOzpo4"

comb_url = sheets_url + suffix

# Load data from Google Sheets
raw_data = pd.read_csv(comb_url, parse_dates=["Timestamp"], index_col=["Timestamp"])

survey_data = raw_data.copy()

# Empty list to collect value columns
val_cols = []

# Correcting Column Names
for column in survey_data.columns:
    # Truncate skills in column names
    if "skills" in column:
        survey_data.rename(columns={column: column[88:-1]}, inplace=True)
        val_cols.append(column[88:-1])

    # Convert type experience question to column name
    elif "you experience" in column:
        survey_data.rename(columns={column: "type_exp"}, inplace=True)

    # Convert years experience question to column name
    elif "long" in column:
        survey_data.rename(columns={column: "years_exp"}, inplace=True)

    # Handle remaining columns
    else:
        pass

    # # Replace special characters
    # if '\&' in column:
    #     survey_data.rename(columns={column: column.replace('&', ' and ')}, inplace=True)

# Create dict to map skill responses to values
survey_resp = {
    "Most important": 5,
    "Fifth": 1,
    "Fourth": 2,
    "Third": 3,
    "Second": 4,
    np.nan: 0,
}

# Map skill responses to values
survey_data[val_cols] = survey_data[val_cols].replace(survey_resp)

# Check for validity of the response
survey_data["valid"] = (survey_data[val_cols].sum(axis=1) > 0) & (
    survey_data[val_cols].sum(axis=1) <= 15
)

# Slice a dataset of valid responses
valid_resp = survey_data[survey_data.valid == True]

In [10]:
survey_data.head()

Unnamed: 0_level_0,Statistical analysis and computing,Machine Learning,Deep Learning,Processing large data sets,Data Visualization,Data Wrangling,Mathematics,Statistics,General coding,Python,R,Business/domain skills,Collaboration,Public speaking,"Art, Music & Humanities",Natural Sciences & Environmental Sciences,Social Sciences,type_exp,years_exp,valid
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2023-10-10 21:09:04,5.0,0.0,0.0,0.0,5.0,0.0,0.0,5.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,5.0,I am/was a student of data science,0-3 years,False
2023-10-11 17:10:08,2.0,0.0,0.0,0.0,5.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,0.0,0.0,0.0,I work in data science,5-10 years,True
2023-10-11 23:18:02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,False
2023-10-18 19:23:04,5.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,I am/was a student of data science,0-3 years,True
2023-10-18 20:10:19,0.0,5.0,0.0,2.0,0.0,3.0,1.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,I work in data science,5-10 years,True


In [None]:
#survey_data.rename(columns={"What skills do you think are the most valuable in a data scientist? Select up to five. [Statistical analysis and computing]":"[Statistical analysis and computing]"}, inplace=True)
#survey_data.rename(columns={"What skills do you think are the most valuable in a data scientist? Select up to five. [Machine Learning]":"[Machine Learning]"}, inplace=True)
#survey_data.rename(columns={"What skills do you think are the most valuable in a data scientist? Select up to five. [Deep Learning]":"[Deep Learning]"}, inplace=True)
#survey_data.rename(columns={"What skills do you think are the most valuable in a data scientist? Select up to five. [Processing large data sets]":"[Processing large data sets]"}, inplace=True)

In [34]:
survey_data.head()

Unnamed: 0_level_0,What skills do you think are the most valuable in a data scientist? Select up to five. [Statistical analysis and computing],What skills do you think are the most valuable in a data scientist? Select up to five. [Machine Learning],What skills do you think are the most valuable in a data scientist? Select up to five. [Deep Learning],What skills do you think are the most valuable in a data scientist? Select up to five. [Processing large data sets],What skills do you think are the most valuable in a data scientist? Select up to five. [Data Visualization],What skills do you think are the most valuable in a data scientist? Select up to five. [Data Wrangling],What skills do you think are the most valuable in a data scientist? Select up to five. [Mathematics],What skills do you think are the most valuable in a data scientist? Select up to five. [Statistics],What skills do you think are the most valuable in a data scientist? Select up to five. [General coding],What skills do you think are the most valuable in a data scientist? Select up to five. [Python],What skills do you think are the most valuable in a data scientist? Select up to five. [R],What skills do you think are the most valuable in a data scientist? Select up to five. [Business/domain skills],What skills do you think are the most valuable in a data scientist? Select up to five. [Collaboration],What skills do you think are the most valuable in a data scientist? Select up to five. [Public speaking],"What skills do you think are the most valuable in a data scientist? Select up to five. [Art, Music & Humanities]",What skills do you think are the most valuable in a data scientist? Select up to five. [Natural Sciences & Environmental Sciences],What skills do you think are the most valuable in a data scientist? Select up to five. [Social Sciences],What is you experience with data science?,"How long do you have experience with data science? (Not learning, either teaching or working)"
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2023-10-10 21:09:04,Most important,,,,Most important,,,Most important,,Second,Second,,,,,,Most important,I am/was a student of data science,0-3 years
2023-10-11 17:10:08,Fourth,,,,Most important,Fifth,,,,,,,Second,Third,,,,I work in data science,5-10 years
2023-10-11 23:18:02,,,,,,,,,,,,,,,,,,,
2023-10-18 19:23:04,Most important,,,,,Second,,,,,,,,,,,,I am/was a student of data science,0-3 years
2023-10-18 20:10:19,,Most important,,Fourth,,Third,Fifth,,,Second,,,,,,,,I work in data science,5-10 years
