<a href="https://colab.research.google.com/github/chloepar/Stack_Overflow_Survey_Exploration_DSBA6211/blob/main/6211_Final_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests, zipfile, io, time, os, logging

In [2]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36'
}

### Load surveys for 2011 - 2015, 2017 - 2025 (2016 has a different folder format)

In [3]:

def find_csv_in_dir(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.csv'):
                return os.path.join(root, file)
    raise FileNotFoundError(f"No CSV found in {directory}")

# Define available years
available_years = range(2011, 2026)
dfs = {}

# Retry settings
max_retries = 10
base_delay = 30  # seconds

for year in available_years:
    url = f'https://survey.stackoverflow.co/datasets/stack-overflow-developer-survey-{year}.zip'
    extract_path = f'/content/survey_{year}'
    print(f"\n📦 Processing {year}...")

    for attempt in range(1, max_retries + 1):
        try:
            r = requests.get(url)
            content_type = r.headers.get('Content-Type', '')

            if r.status_code == 429:
                retry_after = int(r.headers.get('Retry-After', base_delay))
                print(f"⏳ Rate limited. Waiting {retry_after} seconds before retrying...")
                time.sleep(retry_after)
                continue

            if r.status_code == 200 and 'zip' in content_type:
                z = zipfile.ZipFile(io.BytesIO(r.content))
                z.extractall(extract_path)

                csv_path = find_csv_in_dir(extract_path)
                df = pd.read_csv(csv_path, encoding='ISO-8859-1')
                df['year'] = year  # Add the year column
                dfs[year] = df
                print(f"✅ Loaded {year} survey with {len(df)} rows.")
                break  # success, exit retry loop
            else:
                print(f"❌ Failed for {year}: Status {r.status_code}, Content-Type {content_type}")
                break  # don't retry non-429 failures

        except zipfile.BadZipFile:
            print(f"⚠️ Invalid ZIP file for {year}. Attempt {attempt}/{max_retries}")
        except Exception as e:
            print(f"⚠️ Error on attempt {attempt}/{max_retries} for {year}: {e}")

        # Exponential backoff before next retry
        wait = base_delay * (2 ** (attempt - 1))
        print(f"🔁 Retrying in {wait} seconds...")
        time.sleep(wait)



📦 Processing 2011...
✅ Loaded 2011 survey with 2814 rows.

📦 Processing 2012...
✅ Loaded 2012 survey with 6244 rows.

📦 Processing 2013...


  df = pd.read_csv(csv_path, encoding='ISO-8859-1')


✅ Loaded 2013 survey with 9743 rows.

📦 Processing 2014...
✅ Loaded 2014 survey with 7644 rows.

📦 Processing 2015...


  df = pd.read_csv(csv_path, encoding='ISO-8859-1')


✅ Loaded 2015 survey with 26087 rows.

📦 Processing 2016...
✅ Loaded 2016 survey with 56030 rows.

📦 Processing 2017...
⏳ Rate limited. Waiting 30 seconds before retrying...
⏳ Rate limited. Waiting 30 seconds before retrying...
⏳ Rate limited. Waiting 30 seconds before retrying...
✅ Loaded 2017 survey with 51392 rows.

📦 Processing 2018...


  df = pd.read_csv(csv_path, encoding='ISO-8859-1')


✅ Loaded 2018 survey with 98855 rows.

📦 Processing 2019...
⏳ Rate limited. Waiting 30 seconds before retrying...
⏳ Rate limited. Waiting 30 seconds before retrying...
✅ Loaded 2019 survey with 88883 rows.

📦 Processing 2020...
✅ Loaded 2020 survey with 64461 rows.

📦 Processing 2021...
✅ Loaded 2021 survey with 83439 rows.

📦 Processing 2022...
✅ Loaded 2022 survey with 73268 rows.

📦 Processing 2023...
⏳ Rate limited. Waiting 30 seconds before retrying...
⏳ Rate limited. Waiting 30 seconds before retrying...
⏳ Rate limited. Waiting 30 seconds before retrying...
✅ Loaded 2023 survey with 89184 rows.

📦 Processing 2024...
✅ Loaded 2024 survey with 65437 rows.

📦 Processing 2025...
⏳ Rate limited. Waiting 30 seconds before retrying...
⏳ Rate limited. Waiting 30 seconds before retrying...
⏳ Rate limited. Waiting 30 seconds before retrying...
✅ Loaded 2025 survey with 49123 rows.


### Extract 2016

In [4]:
# Setup logging
logging.basicConfig(level=logging.INFO)

def download_and_extract_zip(url, headers, extract_path, max_retries=5, delay=30):
    for attempt in range(1, max_retries + 1):
        try:
            logging.info(f"Attempt {attempt}: Downloading {url}")
            response = requests.get(url, headers=headers, timeout=30)
            response.raise_for_status()  # Raise HTTPError for bad responses
            with zipfile.ZipFile(io.BytesIO(response.content)) as z:
                z.extractall(extract_path)
            logging.info("Download and extraction successful.")
            return True
        except (requests.RequestException, zipfile.BadZipFile) as e:
            logging.warning(f"Attempt {attempt} failed: {e}")
            if attempt < max_retries:
                time.sleep(delay)
            else:
                logging.error("All attempts failed.")
                return False

# Parameters
url_2016 = 'https://survey.stackoverflow.co/datasets/stack-overflow-developer-survey-2016.zip'
extract_path_2016 = '/content/survey_2016'

# Run download
success = download_and_extract_zip(url_2016, headers, extract_path_2016)

# Load CSV if successful
if success:
    csv_path = '/content/survey_2016/2016 Stack Overflow Survey Results/2016 Stack Overflow Survey Responses.csv'
    try:
        df_2016 = pd.read_csv(csv_path, encoding='ISO-8859-1')
        df_2016['year'] = '2016'  # Add the year column
        logging.info("CSV loaded successfully.")
    except Exception as e:
        logging.error(f"Failed to load CSV: {e}")
else:
    df_2016 = None




### Extract dataframes for each year

In [5]:
for year, df in dfs.items():
    globals()[f'df_{year}'] = df

### Combine all dataframes from 2011 to 2025


In [6]:
import re

def drop_unnamed_columns(df):
    """Drops columns that start with 'Unnamed:' from a DataFrame."""
    cols_to_drop = [col for col in df.columns if re.match(r'Unnamed: \d+', col)]
    if cols_to_drop:
        print(f"Dropping columns: {cols_to_drop}")
        df = df.drop(columns=cols_to_drop)
    return df

# Drop unnamed columns from each DataFrame in the dictionary
for year, df in dfs.items():
    dfs[year] = drop_unnamed_columns(df)
    print(f"DataFrame for year {year} after dropping unnamed columns:")
    display(dfs[year].head())

# Drop unnamed columns from the 2016 DataFrame
df_2016 = drop_unnamed_columns(df_2016)
print("DataFrame for year 2016 after dropping unnamed columns:")
display(df_2016.head())

Dropping columns: ['Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14', 'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19', 'Unnamed: 20', 'Unnamed: 22', 'Unnamed: 23', 'Unnamed: 24', 'Unnamed: 25', 'Unnamed: 26', 'Unnamed: 27', 'Unnamed: 28', 'Unnamed: 31', 'Unnamed: 32', 'Unnamed: 33', 'Unnamed: 34', 'Unnamed: 35', 'Unnamed: 36', 'Unnamed: 37', 'Unnamed: 38', 'Unnamed: 39', 'Unnamed: 40', 'Unnamed: 41', 'Unnamed: 42', 'Unnamed: 47', 'Unnamed: 48', 'Unnamed: 49', 'Unnamed: 50', 'Unnamed: 51', 'Unnamed: 52', 'Unnamed: 53', 'Unnamed: 54', 'Unnamed: 55', 'Unnamed: 56', 'Unnamed: 57', 'Unnamed: 58', 'Unnamed: 59', 'Unnamed: 60', 'Unnamed: 61', 'Unnamed: 62']
DataFrame for year 2011 after dropping unnamed columns:


Unnamed: 0,What Country or Region do you live in?,Which US State or Territory do you live in?,How old are you?,How many years of IT/Programming experience do you have?,How would you best describe the industry you work in?,Which best describes the size of your company?,Which of the following best describes your occupation?,How likely is it that a recommendation you make will be acted upon?,What is your involvement in purchasing? You can choose more than 1.,What types of purchases are you involved in?,"What is your budget for outside expenditures (hardware, software, consulting, etc) for 2011?",What type of project are you developing?,Which languages are you proficient in?,What operating system do you use the most?,Please rate your job/career satisfaction,"Including bonus, what is your annual compensation in USD?",Which technology products do you own? (You can choose more than one),"In the last 12 months, how much money have you spent on personal technology-related purchases?",Which of our sites do you frequent most?,year
0,Response,Response,Response,Response,Response,Response,Response,Response,Influencer,Hardware,"<$10,000",Response,Java,Response,Response,Response,iPhone,Response,Response,2011
1,Africa,,< 20,<2,Consulting,Start Up (1-25),Web Application Developer,Not in a million years,,,,Mobile,,Linux,FML,Student / Unemployed,iPhone,<$100,,2011
2,Other Europe,,25-29,41310,Software Products,Mature Small Business (25-100),Server Programmer,It's been known to happen,,,,Enterprise,Java,Windows 7,So happy it hurts,,iPhone,$251-$500,Stack Overflow,2011
3,India,,25-29,41435,Software Products,Mid Sized (100-999),Server Programmer,Unless it's stoopid it gets done,,,,SaaS,Java,Linux,,,,,,2011
4,Germany,,< 20,41310,Foundation / Non-Profit,Student,Student,It's been known to happen,,,"<$10,000",Other,Java,Linux,I enjoy going to work,Student / Unemployed,,"$501-$1,000",Stack Overflow,2011


Dropping columns: ['Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 15', 'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19', 'Unnamed: 23', 'Unnamed: 24', 'Unnamed: 25', 'Unnamed: 26', 'Unnamed: 27', 'Unnamed: 28', 'Unnamed: 29', 'Unnamed: 30', 'Unnamed: 31', 'Unnamed: 32', 'Unnamed: 33', 'Unnamed: 34', 'Unnamed: 35', 'Unnamed: 36', 'Unnamed: 43', 'Unnamed: 45', 'Unnamed: 46', 'Unnamed: 47', 'Unnamed: 48', 'Unnamed: 49', 'Unnamed: 50', 'Unnamed: 51', 'Unnamed: 52', 'Unnamed: 53', 'Unnamed: 54', 'Unnamed: 55', 'Unnamed: 56', 'Unnamed: 57', 'Unnamed: 58', 'Unnamed: 59', 'Unnamed: 60', 'Unnamed: 61', 'Unnamed: 62', 'Unnamed: 63', 'Unnamed: 66', 'Unnamed: 67', 'Unnamed: 68', 'Unnamed: 69', 'Unnamed: 70', 'Unnamed: 74']
DataFrame for year 2012 after dropping unnamed columns:


Unnamed: 0,What Country or Region do you live in?,Which US State or Territory do you live in?,How old are you?,How many years of IT/Programming experience do you have?,How would you best describe the industry you currently work in?,Which best describes the size of your company?,Which of the following best describes your occupation?,What is your involvement in purchasing products or services for the company you work for? (You can choose more than one),What types of purchases are you involved in?,"What is your budget for outside expenditures (hardware, software, consulting, etc) for 2011?",...,Have you visited / Are you aware of Stack Overflow Careers?,Do you have a Stack Overflow Careers Profile?,"You answered you don't have a Careers profile, can you elaborate why?",Which technology products do you own? (You can choose more than one),"In the last 12 months, how much money have you spent on personal technology-related purchases?",Please rate the advertising you've seen on Stack Overflow,What advertisers do you remember seeing on Stack Overflow?,What is your current Stack Overflow reputation?,Which of our sites do you frequent most?,year
0,Response,Response,Response,Response,Response,Response,Response,Influencer,Hardware,Response,...,Response,Response,Response,iPhone,Response,The ads are relevant,Open-Ended Response,Response,Response,2012
1,India,,20-24,<2,Consulting,"Fortune 1000 (1,000+)",Server Programmer,Influencer,Hardware,"<$10,000",...,Yes,"No, but I want one! (Please provide email and ...",,iPhone,"$501-$1,000",,,,,2012
2,Germany,,25-29,<2,Other,Mature Small Business (25-100),Embedded Application Developer,,,Don't know,...,No,No thank you,Thought Careers site was just about finding a ...,,$100-$250,Neutral,,Don't have an account,Stack Overflow,2012
3,United Kingdom,,20-24,41070,Finance / Banking,Mature Small Business (25-100),Web Application Developer,Influencer,Hardware,"$25,001 - $40,000",...,Yes,No thank you,Thought Careers site was just about finding a ...,iPhone,"$1,001-$2,000",Neutral,,1,Stack Overflow,2012
4,France,,20-24,40944,Software Products,Mature Small Business (25-100),Embedded Application Developer,,,,...,Yes,"No, but I want one! (Please provide email and ...",,,"$1,001-$2,000",Agree,None !,Don't have an account,Stack Overflow,2012


Dropping columns: ['Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14', 'Unnamed: 15', 'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 20', 'Unnamed: 21', 'Unnamed: 22', 'Unnamed: 23', 'Unnamed: 24', 'Unnamed: 25', 'Unnamed: 27', 'Unnamed: 28', 'Unnamed: 29', 'Unnamed: 30', 'Unnamed: 31', 'Unnamed: 32', 'Unnamed: 33', 'Unnamed: 35', 'Unnamed: 36', 'Unnamed: 37', 'Unnamed: 38', 'Unnamed: 39', 'Unnamed: 40', 'Unnamed: 41', 'Unnamed: 43', 'Unnamed: 44', 'Unnamed: 45', 'Unnamed: 46', 'Unnamed: 47', 'Unnamed: 48', 'Unnamed: 50', 'Unnamed: 51', 'Unnamed: 52', 'Unnamed: 53', 'Unnamed: 54', 'Unnamed: 57', 'Unnamed: 58', 'Unnamed: 59', 'Unnamed: 60', 'Unnamed: 61', 'Unnamed: 62', 'Unnamed: 63', 'Unnamed: 64', 'Unnamed: 65', 'Unnamed: 66', 'Unnamed: 67', 'Unnamed: 68', 'Unnamed: 69', 'Unnamed: 71', 'Unnamed: 72', 'Unnamed: 73', 'Unnamed: 74', 'Unnamed: 75', 'Unnamed: 76', 'Unnamed: 77', 'Unnamed: 78', 'Unnamed: 79', 'Unnamed: 80', 'Unnamed: 83', 'Unnamed: 84', 'Unnam

Unnamed: 0,What Country or Region do you live in?,Which US State or Territory do you live in?,How old are you?,How many years of IT/Programming experience do you have?,How would you best describe the industry you currently work in?,How many people work for your company?,Which of the following best describes your occupation?,"Including yourself, how many developers are employed at your company?",How large is the team that you work on?,What other departments / roles do you interact with regularly?,...,Have you changed jobs in the last 12 months?,What best describes your career / job satisfaction?,"Including bonus, what is your annual compensation in USD?",Which technology products do you own? (You can choose more than one),"In the last 12 months, how much money have you spent on personal technology-related purchases?",Please rate the advertising you've seen on Stack Overflow,What advertisers do you remember seeing on Stack Overflow?,What is your current Stack Overflow reputation?,How do you use Stack Overflow?,year
0,Response,Response,Response,Response,Response,Response,Response,Response,Response,System Administrators,...,Response,Response,Response,iPhone,Response,The ads are relevant,Open-Ended Response,Response,Read other people's questions to solve my prob...,2013
1,United Kingdom,,35-39,6/10/2013,Finance / Banking,101-999,Enterprise Level Services,100,4/8/2013,System Administrators,...,No,It's a paycheck,"$80,000 - $100,000",iPhone,$100-$250,Neutral,,Don't have an account,Read other people's questions to solve my prob...,2013
2,United States of America,Oregon,25-29,6/10/2013,Retail,101-999,Back-End Web Developer,6/15/2013,4/8/2013,System Administrators,...,No,It's a paycheck,"$20,000 - $40,000",,$251-$500,Disagree,"StackOverflow themselves, Careers 2.0 (SO also...",1,Read other people's questions to solve my prob...,2013
3,United States of America,Wisconsin,51-60,11,Software Products,26-100,Enterprise Level Services,6/15/2013,Just me!,System Administrators,...,No,I'm not happy in my job,"$120,000 - $140,000",,">$3,000",Neutral,don't recall seeing ads on Stack Overflow,Don't have an account,Read other people's questions to solve my prob...,2013
4,Germany,,,,,,,,,,...,,,,,,,,,,2013


Dropping columns: ['Unnamed: 1', 'Unnamed: 14', 'Unnamed: 15', 'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19', 'Unnamed: 21', 'Unnamed: 22', 'Unnamed: 23', 'Unnamed: 24', 'Unnamed: 25', 'Unnamed: 26', 'Unnamed: 27', 'Unnamed: 28', 'Unnamed: 30', 'Unnamed: 31', 'Unnamed: 32', 'Unnamed: 33', 'Unnamed: 35', 'Unnamed: 36', 'Unnamed: 37', 'Unnamed: 38', 'Unnamed: 39', 'Unnamed: 40', 'Unnamed: 43', 'Unnamed: 44', 'Unnamed: 45', 'Unnamed: 46', 'Unnamed: 47', 'Unnamed: 48', 'Unnamed: 49', 'Unnamed: 50', 'Unnamed: 51', 'Unnamed: 52', 'Unnamed: 53', 'Unnamed: 55', 'Unnamed: 56', 'Unnamed: 57', 'Unnamed: 58', 'Unnamed: 59', 'Unnamed: 60', 'Unnamed: 61', 'Unnamed: 62', 'Unnamed: 63', 'Unnamed: 64', 'Unnamed: 65', 'Unnamed: 66', 'Unnamed: 69', 'Unnamed: 70', 'Unnamed: 71', 'Unnamed: 72', 'Unnamed: 73', 'Unnamed: 74', 'Unnamed: 75', 'Unnamed: 76', 'Unnamed: 77', 'Unnamed: 78', 'Unnamed: 79', 'Unnamed: 80', 'Unnamed: 81', 'Unnamed: 84', 'Unnamed: 88', 'Unnamed: 89', 'Unnamed: 90', 'Unname

Unnamed: 0,What Country do you live in?,Which US State or Territory do you live in?,How old are you?,What is your gender?,How many years of IT/Programming experience do you have?,Which of the following best describes your occupation?,"Including bonus, what is your annual compensation in USD?",How would you best describe the industry you currently work in?,How many developers are employed at your company?,Do you work remotely?,...,Have you visited / Are you aware of Stack Overflow Careers 2.0?,Do you have a Stack Overflow Careers 2.0 Profile?,Please rate the advertising you've seen on Stack Overflow,Were you aware of the Apptivate contest?,Did you participate in the Apptivate contest?,What advertisers do you remember seeing on Stack Overflow?,What is your current Stack Overflow reputation?,How do you use Stack Overflow?,How often do you find solutions to your programming problems on Stack Overflow without asking a new question?,year
0,Response,Response,Response,Response,Response,Response,Response,Response,Response,Response,...,Response,Response,The ads are relevant,Response,Response,Open-Ended Response,Response,Read other people's questions to solve my prob...,Response,2014
1,India,,30-34,Female,6/10/2014,Back-End Web Developer,"$20,000 - $40,000",Finance / Banking,100,Occasionally,...,No,Yes,Strongly Agree,No,No,,500,Read other people's questions to solve my prob...,Almost Always,2014
2,Thailand,,20-24,Male,<2,Back-End Web Developer,Student / Unemployed,Healthcare,,Never,...,No,No thank you,Neutral,,,,Don't have an account,Read other people's questions to solve my prob...,,2014
3,Iran,,25-29,Male,6/10/2014,Desktop Software Developer,"<$20,000",Not Currently Employed,1/5/2014,Occasionally,...,No,No thank you,Agree,No,No,Tehcodez,1,Read other people's questions to solve my prob...,Almost Always,2014
4,Ukraine,,< 20,Male,<2,Student,Student / Unemployed,Student,,Never,...,No,No thank you,Strongly Agree,,,,50,Read other people's questions to solve my prob...,Almost Always,2014


Dropping columns: ['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14', 'Unnamed: 15', 'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19', 'Unnamed: 20', 'Unnamed: 21', 'Unnamed: 22', 'Unnamed: 23', 'Unnamed: 24', 'Unnamed: 25', 'Unnamed: 26', 'Unnamed: 27', 'Unnamed: 28', 'Unnamed: 29', 'Unnamed: 30', 'Unnamed: 31', 'Unnamed: 32', 'Unnamed: 33', 'Unnamed: 34', 'Unnamed: 35', 'Unnamed: 36', 'Unnamed: 37', 'Unnamed: 38', 'Unnamed: 39', 'Unnamed: 40', 'Unnamed: 41', 'Unnamed: 42', 'Unnamed: 43', 'Unnamed: 44', 'Unnamed: 45', 'Unnamed: 46', 'Unnamed: 47', 'Unnamed: 48', 'Unnamed: 49', 'Unnamed: 50', 'Unnamed: 52', 'Unnamed: 53', 'Unnamed: 54', 'Unnamed: 55', 'Unnamed: 56', 'Unnamed: 57', 'Unnamed: 58', 'Unnamed: 59', 'Unnamed: 60', 'Unnamed: 61', 'Unnamed: 62', 'Unnamed: 63', 'Unnamed: 64', 'Unnamed: 65', 'Unnamed: 66', 'Unnamed: 67', 

Unnamed: 0,Select all that apply,Select all that apply.1,Select all that apply.2,Select up to 3,Select up to 3.1,Select up to 3.2,Select all that apply.3,Select up to 3.3,Select all that apply.4,Select all that apply.5,Select all that apply.6,Select all that apply.7,year
0,Current Lang & Tech: Android,Future Lang & Tech: Android,Training & Education: No formal training,Most important aspect of new job opportunity: ...,Most annoying about job search: Finding time,Appealing message traits: Message is personalized,Most urgent info about job opportunity: Salary,How can companies improve interview process: M...,Why try Stack Overflow Careers: No spam,Source control used: Git,Why use Stack Overflow: Help for job,Why answer: Help a programmer in need,2015
1,,,,,Finding time to job search,Message is personalized,Salary,Show meÃ¦more live code,,,To get help for my job,,2015
2,,,,Salary,,Message is personalized,,,,Git,To get help for my job,,2015
3,,,,,Finding time to job search,,,,,,,,2015
4,,,,Salary,Finding time to job search,Message is personalized,Salary,Show meÃ¦more live code,,Git,To get help for my job,It feels good to help a programmer in need,2015


Dropping columns: ['Unnamed: 0']
DataFrame for year 2016 after dropping unnamed columns:


Unnamed: 0,collector,country,un_subregion,so_region,age_range,age_midpoint,gender,self_identification,occupation,occupation_group,...,important_newtech,important_buildnew,important_buildexisting,important_promotion,important_companymission,important_wfh,important_ownoffice,developer_challenges,why_stack_overflow,year
0,Facebook,Afghanistan,Southern Asia,Central Asia,20-24,22.0,Male,Programmer,,,...,,,,,,,,,,2016
1,Facebook,Afghanistan,Southern Asia,Central Asia,30-34,32.0,Male,Developer; Engineer; Programmer,Mobile developer - iOS,"Mobile Dev (Android, iOS, WP & Multi-Platform)",...,This is very important,This is very important,I don't care about this,This is somewhat important,This is somewhat important,I don't care about this,I don't care about this,Unrealistic expectations; Interacting with non...,To get help for my job; Because I can't do my ...,2016
2,Facebook,Afghanistan,Southern Asia,Central Asia,,,,,,,...,,,,,,,,,,2016
3,Facebook,Afghanistan,Southern Asia,Central Asia,,,Female,Engineer,DevOps,DevOps,...,This is somewhat important,This is very important,This is very important,I don't care about this,I don't care about this,This is very important,This is somewhat important,Corporate policies; Interacting with non-techn...,To get help for my job; Beacause I love to learn,2016
4,Facebook,Afghanistan,Southern Asia,Central Asia,> 60,65.0,Prefer not to disclose,Developer; Engineer; Programmer; Sr. Developer...,,,...,,,,,,,,,,2016


DataFrame for year 2017 after dropping unnamed columns:


Unnamed: 0,Respondent,Professional,ProgramHobby,Country,University,EmploymentStatus,FormalEducation,MajorUndergrad,HomeRemote,CompanySize,...,Gender,HighestEducationParents,Race,SurveyLong,QuestionsInteresting,QuestionsConfusing,InterestedAnswers,Salary,ExpectedSalary,year
0,1,Student,"Yes, both",United States,No,"Not employed, and not looking for work",Secondary school,,,,...,Male,High school,White or of European descent,Strongly disagree,Strongly agree,Disagree,Strongly agree,,,2017
1,2,Student,"Yes, both",United Kingdom,"Yes, full-time",Employed part-time,Some college/university study without earning ...,Computer science or software engineering,"More than half, but not all, the time",20 to 99 employees,...,Male,A master's degree,White or of European descent,Somewhat agree,Somewhat agree,Disagree,Strongly agree,,37500.0,2017
2,3,Professional developer,"Yes, both",United Kingdom,No,Employed full-time,Bachelor's degree,Computer science or software engineering,"Less than half the time, but at least one day ...","10,000 or more employees",...,Male,A professional degree,White or of European descent,Somewhat agree,Agree,Disagree,Agree,113750.0,,2017
3,4,Professional non-developer who sometimes write...,"Yes, both",United States,No,Employed full-time,Doctoral degree,A non-computer-focused engineering discipline,"Less than half the time, but at least one day ...","10,000 or more employees",...,Male,A doctoral degree,White or of European descent,Agree,Agree,Somewhat agree,Strongly agree,,,2017
4,5,Professional developer,"Yes, I program as a hobby",Switzerland,No,Employed full-time,Master's degree,Computer science or software engineering,Never,10 to 19 employees,...,,,,,,,,,,2017


DataFrame for year 2018 after dropping unnamed columns:


Unnamed: 0,Respondent,Hobby,OpenSource,Country,Student,Employment,FormalEducation,UndergradMajor,CompanySize,DevType,...,Gender,SexualOrientation,EducationParents,RaceEthnicity,Age,Dependents,MilitaryUS,SurveyTooLong,SurveyEasy,year
0,1,Yes,No,Kenya,No,Employed part-time,"Bachelorâs degree (BA, BS, B.Eng., etc.)",Mathematics or statistics,20 to 99 employees,Full-stack developer,...,Male,Straight or heterosexual,"Bachelorâs degree (BA, BS, B.Eng., etc.)",Black or of African descent,25 - 34 years old,Yes,,The survey was an appropriate length,Very easy,2018
1,3,Yes,Yes,United Kingdom,No,Employed full-time,"Bachelorâs degree (BA, BS, B.Eng., etc.)","A natural science (ex. biology, chemistry, phy...","10,000 or more employees",Database administrator;DevOps specialist;Full-...,...,Male,Straight or heterosexual,"Bachelorâs degree (BA, BS, B.Eng., etc.)",White or of European descent,35 - 44 years old,Yes,,The survey was an appropriate length,Somewhat easy,2018
2,4,Yes,Yes,United States,No,Employed full-time,Associate degree,"Computer science, computer engineering, or sof...",20 to 99 employees,Engineering manager;Full-stack developer,...,,,,,,,,,,2018
3,5,No,No,United States,No,Employed full-time,"Bachelorâs degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",100 to 499 employees,Full-stack developer,...,Male,Straight or heterosexual,Some college/university study without earning ...,White or of European descent,35 - 44 years old,No,No,The survey was an appropriate length,Somewhat easy,2018
4,7,Yes,No,South Africa,"Yes, part-time",Employed full-time,Some college/university study without earning ...,"Computer science, computer engineering, or sof...","10,000 or more employees",Data or business analyst;Desktop or enterprise...,...,Male,Straight or heterosexual,Some college/university study without earning ...,White or of European descent,18 - 24 years old,Yes,,The survey was an appropriate length,Somewhat easy,2018


DataFrame for year 2019 after dropping unnamed columns:


Unnamed: 0,Respondent,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,...,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase,year
0,1,I am a student who is learning to code,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,...,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult,2019
1,2,I am a student who is learning to code,No,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",,...,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult,2019
2,3,"I am not primarily a developer, but I write co...",Yes,Never,The quality of OSS and closed source software ...,Employed full-time,Thailand,No,"Bachelorâs degree (BA, BS, B.Eng., etc.)",Web development or web design,...,Tech meetups or events in your area;Courses on...,28.0,Man,No,Straight / Heterosexual,,Yes,Appropriate in length,Neither easy nor difficult,2019
3,4,I am a developer by profession,No,Never,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelorâs degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,Tech articles written by other developers;Indu...,22.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy,2019
4,5,I am a developer by profession,Yes,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Ukraine,No,"Bachelorâs degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,Tech meetups or events in your area;Courses on...,30.0,Man,No,Straight / Heterosexual,White or of European descent;Multiracial,No,Appropriate in length,Easy,2019


DataFrame for year 2020 after dropping unnamed columns:


Unnamed: 0,Respondent,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,...,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro,year
0,1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,...,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27.0,2020
1,2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,...,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4.0,2020
2,3,I code primarily as a hobby,Yes,,15,,,,Russian Federation,,...,Appropriate in length,,,,,Somewhat more welcome now than last year,,4,,2020
3,4,I am a developer by profession,Yes,25.0,18,,,,Albania,Albanian lek,...,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4.0,2020
4,5,"I used to be a developer by profession, but no...",Yes,31.0,16,,,,United States,,...,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15,8.0,2020


DataFrame for year 2021 after dropping unnamed columns:


Unnamed: 0,ResponseId,MainBranch,Employment,Country,US_State,UK_Country,EdLevel,Age1stCode,LearnCode,YearsCode,...,Gender,Trans,Sexuality,Ethnicity,Accessibility,MentalHealth,SurveyLength,SurveyEase,ConvertedCompYearly,year
0,1,I am a developer by profession,"Independent contractor, freelancer, or self-em...",Slovakia,,,"Secondary school (e.g. American high school, G...",18 - 24 years,Coding Bootcamp;Other online resources (ex: vi...,,...,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,62268.0,2021
1,2,I am a student who is learning to code,"Student, full-time",Netherlands,,,"Bachelorâs degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",7.0,...,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,,2021
2,3,"I am not primarily a developer, but I write co...","Student, full-time",Russian Federation,,,"Bachelorâs degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",,...,Man,No,Prefer not to say,Prefer not to say,None of the above,None of the above,Appropriate in length,Easy,,2021
3,4,I am a developer by profession,Employed full-time,Austria,,,"Masterâs degree (M.A., M.S., M.Eng., MBA, etc.)",11 - 17 years,,,...,Man,No,Straight / Heterosexual,White or of European descent,I am deaf / hard of hearing,,Appropriate in length,Neither easy nor difficult,,2021
4,5,I am a developer by profession,"Independent contractor, freelancer, or self-em...",United Kingdom of Great Britain and Northern I...,,England,"Masterâs degree (M.A., M.S., M.Eng., MBA, etc.)",5 - 10 years,Friend or family member,17.0,...,Man,No,,White or of European descent,None of the above,,Appropriate in length,Easy,,2021


DataFrame for year 2022 after dropping unnamed columns:


Unnamed: 0,ResponseId,MainBranch,Employment,RemoteWork,CodingActivities,EdLevel,LearnCode,LearnCodeOnline,LearnCodeCoursesCert,YearsCode,...,TimeAnswering,Onboarding,ProfessionalTech,TrueFalse_1,TrueFalse_2,TrueFalse_3,SurveyLength,SurveyEase,ConvertedCompYearly,year
0,1,None of these,,,,,,,,,...,,,,,,,,,,2022
1,2,I am a developer by profession,"Employed, full-time",Fully remote,Hobby;Contribute to open-source projects,,,,,,...,,,,,,,Too long,Difficult,,2022
2,3,"I am not primarily a developer, but I write co...","Employed, full-time","Hybrid (some remote, some in-person)",Hobby,"Masterâs degree (M.A., M.S., M.Eng., MBA, etc.)",Books / Physical media;Friend or family member...,Technical documentation;Blogs;Programming Game...,,14.0,...,,,,,,,Appropriate in length,Neither easy nor difficult,40205.0,2022
3,4,I am a developer by profession,"Employed, full-time",Fully remote,I donât code outside of work,"Bachelorâs degree (B.A., B.S., B.Eng., etc.)","Books / Physical media;School (i.e., Universit...",,,20.0,...,,,,,,,Appropriate in length,Easy,215232.0,2022
4,5,I am a developer by profession,"Employed, full-time","Hybrid (some remote, some in-person)",Hobby,"Bachelorâs degree (B.A., B.S., B.Eng., etc.)","Other online resources (e.g., videos, blogs, f...",Technical documentation;Blogs;Stack Overflow;O...,,8.0,...,,,,,,,Too long,Easy,,2022


DataFrame for year 2023 after dropping unnamed columns:


Unnamed: 0,ResponseId,Q120,MainBranch,Age,Employment,RemoteWork,CodingActivities,EdLevel,LearnCode,LearnCodeOnline,...,Frequency_2,Frequency_3,TimeSearching,TimeAnswering,ProfessionalTech,Industry,SurveyLength,SurveyEase,ConvertedCompYearly,year
0,1,I agree,None of these,18-24 years old,,,,,,,...,,,,,,,,,,2023
1,2,I agree,I am a developer by profession,25-34 years old,"Employed, full-time",Remote,Hobby;Contribute to open-source projects;Boots...,"Bachelorâs degree (B.A., B.S., B.Eng., etc.)",Books / Physical media;Colleague;Friend or fam...,Formal documentation provided by the owner of ...,...,10+ times a week,Never,15-30 minutes a day,15-30 minutes a day,DevOps function;Microservices;Automated testin...,"Information Services, IT, Software Development...",Appropriate in length,Easy,285000.0,2023
2,3,I agree,I am a developer by profession,45-54 years old,"Employed, full-time","Hybrid (some remote, some in-person)",Hobby;Professional development or self-paced l...,"Bachelorâs degree (B.A., B.S., B.Eng., etc.)",Books / Physical media;Colleague;On the job tr...,Formal documentation provided by the owner of ...,...,6-10 times a week,3-5 times a week,30-60 minutes a day,30-60 minutes a day,DevOps function;Microservices;Automated testin...,"Information Services, IT, Software Development...",Appropriate in length,Easy,250000.0,2023
3,4,I agree,I am a developer by profession,25-34 years old,"Employed, full-time","Hybrid (some remote, some in-person)",Hobby,"Bachelorâs degree (B.A., B.S., B.Eng., etc.)",Colleague;Friend or family member;Other online...,Formal documentation provided by the owner of ...,...,10+ times a week,1-2 times a week,15-30 minutes a day,30-60 minutes a day,Automated testing;Continuous integration (CI) ...,,Appropriate in length,Easy,156000.0,2023
4,5,I agree,I am a developer by profession,25-34 years old,"Employed, full-time;Independent contractor, fr...",Remote,Hobby;Contribute to open-source projects;Profe...,"Bachelorâs degree (B.A., B.S., B.Eng., etc.)",Books / Physical media;Online Courses or Certi...,Formal documentation provided by the owner of ...,...,1-2 times a week,3-5 times a week,60-120 minutes a day,30-60 minutes a day,Microservices;Automated testing;Observability ...,Other,Appropriate in length,Neither easy nor difficult,23456.0,2023


DataFrame for year 2024 after dropping unnamed columns:


Unnamed: 0,ResponseId,MainBranch,Age,Employment,RemoteWork,Check,CodingActivities,EdLevel,LearnCode,LearnCodeOnline,...,JobSatPoints_7,JobSatPoints_8,JobSatPoints_9,JobSatPoints_10,JobSatPoints_11,SurveyLength,SurveyEase,ConvertedCompYearly,JobSat,year
0,1,I am a developer by profession,Under 18 years old,"Employed, full-time",Remote,Apples,Hobby,Primary/elementary school,Books / Physical media,,...,,,,,,,,,,2024
1,2,I am a developer by profession,35-44 years old,"Employed, full-time",Remote,Apples,Hobby;Contribute to open-source projects;Other...,"Bachelorâs degree (B.A., B.S., B.Eng., etc.)",Books / Physical media;Colleague;On the job tr...,Technical documentation;Blogs;Books;Written Tu...,...,0.0,0.0,0.0,0.0,0.0,,,,,2024
2,3,I am a developer by profession,45-54 years old,"Employed, full-time",Remote,Apples,Hobby;Contribute to open-source projects;Other...,"Masterâs degree (M.A., M.S., M.Eng., MBA, etc.)",Books / Physical media;Colleague;On the job tr...,Technical documentation;Blogs;Books;Written Tu...,...,,,,,,Appropriate in length,Easy,,,2024
3,4,I am learning to code,18-24 years old,"Student, full-time",,Apples,,Some college/university study without earning ...,"Other online resources (e.g., videos, blogs, f...",Stack Overflow;How-to videos;Interactive tutorial,...,,,,,,Too long,Easy,,,2024
4,5,I am a developer by profession,18-24 years old,"Student, full-time",,Apples,,"Secondary school (e.g. American high school, G...","Other online resources (e.g., videos, blogs, f...",Technical documentation;Blogs;Written Tutorial...,...,,,,,,Too short,Easy,,,2024


DataFrame for year 2025 after dropping unnamed columns:


Unnamed: 0,"ï»¿""ResponseId""",MainBranch,Age,EdLevel,Employment,EmploymentAddl,WorkExp,LearnCodeChoose,LearnCode,LearnCodeAI,...,AIAgentOrchWrite,AIAgentObserveSecure,AIAgentObsWrite,AIAgentExternal,AIAgentExtWrite,AIHuman,AIOpen,ConvertedCompYearly,JobSat,year
0,1,I am a developer by profession,25-34 years old,"Masterâs degree (M.A., M.S., M.Eng., MBA, etc.)",Employed,"Caring for dependents (children, elderly, etc.)",8.0,"Yes, I am not new to coding but am learning ne...",Online Courses or Certification (includes all ...,"Yes, I learned how to use AI-enabled tools for...",...,,,,ChatGPT,,When I donât trust AIâs answers,"Troubleshooting, profiling, debugging",61256.0,10.0,2025
1,2,I am a developer by profession,25-34 years old,"Associate degree (A.A., A.S., etc.)",Employed,,2.0,"Yes, I am not new to coding but am learning ne...",Online Courses or Certification (includes all ...,"Yes, I learned how to use AI-enabled tools for...",...,,,,,,When I donât trust AIâs answers;When I wan...,All skills. AI is a flop.,104413.0,9.0,2025
2,3,I am a developer by profession,35-44 years old,"Bachelorâs degree (B.A., B.S., B.Eng., etc.)","Independent contractor, freelancer, or self-em...",None of the above,10.0,"Yes, I am not new to coding but am learning ne...",Online Courses or Certification (includes all ...,"Yes, I learned how to use AI-enabled tools for...",...,,,,ChatGPT;Claude Code;GitHub Copilot;Google Gemini,,When I donât trust AIâs answers;When I wan...,"Understand how things actually work, problem s...",53061.0,8.0,2025
3,4,I am a developer by profession,35-44 years old,"Bachelorâs degree (B.A., B.S., B.Eng., etc.)",Employed,None of the above,4.0,"Yes, I am not new to coding but am learning ne...","Other online resources (e.g. standard search, ...","Yes, I learned how to use AI-enabled tools for...",...,,,,ChatGPT;Claude Code,,When I donât trust AIâs answers;When I wan...,,36197.0,6.0,2025
4,5,I am a developer by profession,35-44 years old,"Masterâs degree (M.A., M.S., M.Eng., MBA, etc.)","Independent contractor, freelancer, or self-em...","Caring for dependents (children, elderly, etc.)",21.0,"No, I am not new to coding and did not learn n...",,"Yes, I learned how to use AI-enabled tools for...",...,,,,,,When I donât trust AIâs answers,"critical thinking, the skill to define the tas...",60000.0,7.0,2025


Dropping columns: ['Unnamed: 0']
DataFrame for year 2016 after dropping unnamed columns:


Unnamed: 0,collector,country,un_subregion,so_region,age_range,age_midpoint,gender,self_identification,occupation,occupation_group,...,important_newtech,important_buildnew,important_buildexisting,important_promotion,important_companymission,important_wfh,important_ownoffice,developer_challenges,why_stack_overflow,year
0,Facebook,Afghanistan,Southern Asia,Central Asia,20-24,22.0,Male,Programmer,,,...,,,,,,,,,,2016
1,Facebook,Afghanistan,Southern Asia,Central Asia,30-34,32.0,Male,Developer; Engineer; Programmer,Mobile developer - iOS,"Mobile Dev (Android, iOS, WP & Multi-Platform)",...,This is very important,This is very important,I don't care about this,This is somewhat important,This is somewhat important,I don't care about this,I don't care about this,Unrealistic expectations; Interacting with non...,To get help for my job; Because I can't do my ...,2016
2,Facebook,Afghanistan,Southern Asia,Central Asia,,,,,,,...,,,,,,,,,,2016
3,Facebook,Afghanistan,Southern Asia,Central Asia,,,Female,Engineer,DevOps,DevOps,...,This is somewhat important,This is very important,This is very important,I don't care about this,I don't care about this,This is very important,This is somewhat important,Corporate policies; Interacting with non-techn...,To get help for my job; Beacause I love to learn,2016
4,Facebook,Afghanistan,Southern Asia,Central Asia,> 60,65.0,Prefer not to disclose,Developer; Engineer; Programmer; Sr. Developer...,,,...,,,,,,,,,,2016


In [7]:
all_dfs = list(dfs.values()) + [df_2016]
df = pd.concat(all_dfs, ignore_index=True)
display(df.head())

Unnamed: 0,What Country or Region do you live in?,Which US State or Territory do you live in?,How old are you?,How many years of IT/Programming experience do you have?,How would you best describe the industry you work in?,Which best describes the size of your company?,Which of the following best describes your occupation?,How likely is it that a recommendation you make will be acted upon?,What is your involvement in purchasing? You can choose more than 1.,What types of purchases are you involved in?,...,AIAgentKnowledge,AIAgentKnowWrite,AIAgentOrchestration,AIAgentOrchWrite,AIAgentObserveSecure,AIAgentObsWrite,AIAgentExternal,AIAgentExtWrite,AIHuman,AIOpen
0,Response,Response,Response,Response,Response,Response,Response,Response,Influencer,Hardware,...,,,,,,,,,,
1,Africa,,< 20,<2,Consulting,Start Up (1-25),Web Application Developer,Not in a million years,,,...,,,,,,,,,,
2,Other Europe,,25-29,41310,Software Products,Mature Small Business (25-100),Server Programmer,It's been known to happen,,,...,,,,,,,,,,
3,India,,25-29,41435,Software Products,Mid Sized (100-999),Server Programmer,Unless it's stoopid it gets done,,,...,,,,,,,,,,
4,Germany,,< 20,41310,Foundation / Non-Profit,Student,Student,It's been known to happen,,,...,,,,,,,,,,


In [8]:
df.describe(include='all').T

  sqr = _ensure_numeric((avg - values) ** 2)


Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
What Country or Region do you live in?,18800,21,United States of America,6003,,,,,,,
Which US State or Territory do you live in?,8075,54,California,1135,,,,,,,
How old are you?,25477,9,25-29,7373,,,,,,,
How many years of IT/Programming experience do you have?,25479,11,11,7800,,,,,,,
How would you best describe the industry you work in?,2730,13,Software Products,814,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
AIAgentObsWrite,264,195,no,13,,,,,,,
AIAgentExternal,8326,1310,ChatGPT;GitHub Copilot,669,,,,,,,
AIAgentExtWrite,858,421,Cursor,105,,,,,,,
AIHuman,29167,574,When I donât trust AIâs answers;When I wan...,3046,,,,,,,


In [9]:
# Identify columns in each DataFrame for all available years
all_years = list(dfs.keys()) + [2016] # Include 2016 in the list of years
cols_by_year = {year: set(dfs[year].columns) if year != 2016 else set(df_2016.columns) for year in all_years}

# Get all unique columns across all years
all_unique_cols = set().union(*cols_by_year.values())

# Create a dictionary to store column presence in each year
column_presence = {year: [col in cols_by_year[year] for col in all_unique_cols] for year in all_years}

# Create a pandas DataFrame (matrix) from the dictionary
column_matrix = pd.DataFrame(column_presence, index=list(all_unique_cols))

# Add a column to count the number of dataframes the column is in
column_matrix['PresenceCount'] = column_matrix.sum(axis=1)

# Sort the matrix by 'PresenceCount' in descending order
column_matrix_sorted = column_matrix.sort_values(by='PresenceCount', ascending=False)

# Drop the 'PresenceCount' column for the final display
column_matrix_sorted = column_matrix_sorted.drop(columns=['PresenceCount'])

# Display the sorted matrix
display(column_matrix_sorted)

Unnamed: 0,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025
year,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
Country,False,False,False,False,False,False,True,True,True,True,True,True,True,True,True
Employment,False,False,False,False,False,False,False,True,True,True,True,True,True,True,True
DevType,False,False,False,False,False,False,False,True,True,True,True,True,True,True,True
Age,False,False,False,False,False,False,False,True,True,True,True,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Frustration,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False
LanguagesHaveEntry,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True
ExCoderReturn,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False
WorkLoc,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False


In [None]:
# Check for missing values in 'Country' column
print("Missing values in 'Country' column:")
print(df['Country'].isnull().sum())

# Drop rows with missing 'Country' values
df_country = df.dropna(subset=['Country']).copy()

# Group by 'Country' and 'Year' and count occurrences
country_counts = df_country.groupby(['Country', 'Year']).size().reset_index(name='Count')

# Display the counts
display(country_counts.head())

# Pivot the table for easier plotting
country_pivot = country_counts.pivot(index='Country', columns='Year', values='Count').fillna(0)

# Display the pivoted table
display(country_pivot.head())

# Optional: Visualize the top N countries over the years
top_n = 10
top_countries = country_pivot.sum(axis=1).nlargest(top_n).index
country_pivot_top = country_pivot.loc[top_countries]

# Reset index to make 'Country' a column and melt for plotting
country_pivot_top_reset = country_pivot_top.reset_index()
country_melted = country_pivot_top_reset.melt(id_vars='Country', var_name='Year', value_name='Count')

plt.figure(figsize=(15, 7))
sns.lineplot(data=country_melted, x='Year', y='Count', hue='Country')

plt.title(f'Top {top_n} Stack Overflow Users by Country and Year')
plt.xlabel('Year')
plt.ylabel('Number of Users')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

Missing values in 'Country' column:
188461
