# Exploratory Data Analysis (EDA) on Developer Demographics

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from collections import Counter
import pycountry
import plotly.express as px
import warnings; 
warnings.simplefilter('ignore')

In [2]:
df = pd.read_csv('dataset/survey_results_public.csv')

In [3]:
df.head()

Unnamed: 0,ResponseId,MainBranch,Age,Employment,RemoteWork,Check,CodingActivities,EdLevel,LearnCode,LearnCodeOnline,...,JobSatPoints_6,JobSatPoints_7,JobSatPoints_8,JobSatPoints_9,JobSatPoints_10,JobSatPoints_11,SurveyLength,SurveyEase,ConvertedCompYearly,JobSat
0,1,I am a developer by profession,Under 18 years old,"Employed, full-time",Remote,Apples,Hobby,Primary/elementary school,Books / Physical media,,...,,,,,,,,,,
1,2,I am a developer by profession,35-44 years old,"Employed, full-time",Remote,Apples,Hobby;Contribute to open-source projects;Other...,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Books / Physical media;Colleague;On the job tr...,Technical documentation;Blogs;Books;Written Tu...,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,
2,3,I am a developer by profession,45-54 years old,"Employed, full-time",Remote,Apples,Hobby;Contribute to open-source projects;Other...,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",Books / Physical media;Colleague;On the job tr...,Technical documentation;Blogs;Books;Written Tu...,...,,,,,,,Appropriate in length,Easy,,
3,4,I am learning to code,18-24 years old,"Student, full-time",,Apples,,Some college/university study without earning ...,"Other online resources (e.g., videos, blogs, f...",Stack Overflow;How-to videos;Interactive tutorial,...,,,,,,,Too long,Easy,,
4,5,I am a developer by profession,18-24 years old,"Student, full-time",,Apples,,"Secondary school (e.g. American high school, G...","Other online resources (e.g., videos, blogs, f...",Technical documentation;Blogs;Written Tutorial...,...,,,,,,,Too short,Easy,,


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 65437 entries, 0 to 65436
Columns: 114 entries, ResponseId to JobSat
dtypes: float64(13), int64(1), object(100)
memory usage: 56.9+ MB


In [5]:
df.describe()

Unnamed: 0,ResponseId,CompTotal,WorkExp,JobSatPoints_1,JobSatPoints_4,JobSatPoints_5,JobSatPoints_6,JobSatPoints_7,JobSatPoints_8,JobSatPoints_9,JobSatPoints_10,JobSatPoints_11,ConvertedCompYearly,JobSat
count,65437.0,33740.0,29658.0,29324.0,29393.0,29411.0,29450.0,29448.0,29456.0,29456.0,29450.0,29445.0,23435.0,29126.0
mean,32719.0,2.963841e+145,11.466957,18.581094,7.52214,10.060857,24.343232,22.96522,20.278165,16.169432,10.955713,9.953948,86155.29,6.935041
std,18890.179119,5.444117e+147,9.168709,25.966221,18.422661,21.833836,27.08936,27.01774,26.10811,24.845032,22.906263,21.775652,186757.0,2.088259
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
25%,16360.0,60000.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,32712.0,6.0
50%,32719.0,110000.0,9.0,10.0,0.0,0.0,20.0,15.0,10.0,5.0,0.0,0.0,65000.0,7.0
75%,49078.0,250000.0,16.0,22.0,5.0,10.0,30.0,30.0,25.0,20.0,10.0,10.0,107971.5,8.0
max,65437.0,1e+150,50.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,16256600.0,10.0


In [6]:
df.columns.to_list()

['ResponseId',
 'MainBranch',
 'Age',
 'Employment',
 'RemoteWork',
 'Check',
 'CodingActivities',
 'EdLevel',
 'LearnCode',
 'LearnCodeOnline',
 'TechDoc',
 'YearsCode',
 'YearsCodePro',
 'DevType',
 'OrgSize',
 'PurchaseInfluence',
 'BuyNewTool',
 'BuildvsBuy',
 'TechEndorse',
 'Country',
 'Currency',
 'CompTotal',
 'LanguageHaveWorkedWith',
 'LanguageWantToWorkWith',
 'LanguageAdmired',
 'DatabaseHaveWorkedWith',
 'DatabaseWantToWorkWith',
 'DatabaseAdmired',
 'PlatformHaveWorkedWith',
 'PlatformWantToWorkWith',
 'PlatformAdmired',
 'WebframeHaveWorkedWith',
 'WebframeWantToWorkWith',
 'WebframeAdmired',
 'EmbeddedHaveWorkedWith',
 'EmbeddedWantToWorkWith',
 'EmbeddedAdmired',
 'MiscTechHaveWorkedWith',
 'MiscTechWantToWorkWith',
 'MiscTechAdmired',
 'ToolsTechHaveWorkedWith',
 'ToolsTechWantToWorkWith',
 'ToolsTechAdmired',
 'NEWCollabToolsHaveWorkedWith',
 'NEWCollabToolsWantToWorkWith',
 'NEWCollabToolsAdmired',
 'OpSysPersonal use',
 'OpSysProfessional use',
 'OfficeStackAsyncHa

In [7]:
df.isnull().sum()

ResponseId                 0
MainBranch                 0
Age                        0
Employment                 0
RemoteWork             10631
                       ...  
JobSatPoints_11        35992
SurveyLength            9255
SurveyEase              9199
ConvertedCompYearly    42002
JobSat                 36311
Length: 114, dtype: int64

Columns necessary for EDA

Key Columns for the Project:

	1.	Demographic and Personal Information:
	•	Age: To analyze how age impacts satisfaction levels.
	•	EdLevel: To study the relationship between education level and satisfaction.
	•	YearsCode: To assess how years of coding experience (all experience) relate to satisfaction.
	•	YearsCodePro: To examine how professional coding experience specifically relates to satisfaction.
	•	WorkExp: To explore the total years of work experience.
	•	Country: To analyze satisfaction trends across different countries (optional if you want to compare across regions).
	2.	Job Satisfaction and Career Satisfaction:
	•	JobSat: Measures job satisfaction, a key target variable for analysis.
	•	CareerSat: Career satisfaction is implied from JobSat in some datasets or may need to be derived (but this is the core factor to analyze if available).
	3.	Employment Information:
	•	MainBranch: To analyze satisfaction based on current job focus (e.g., developer, data scientist, etc.).
	•	Employment: To analyze whether full-time, part-time, or contract employment impacts satisfaction.
	•	RemoteWork: To explore the relationship between remote work and satisfaction.
	4.	Compensation Information:
	•	CompTotal: The total compensation (if relevant to study how salary affects job/career satisfaction).
	•	ConvertedCompYearly: The yearly compensation converted into a consistent currency (more useful for salary analysis).
	5.	Job Role and Type:
	•	DevType: To see how different development roles (e.g., software engineer, web developer, data scientist) affect satisfaction.

Optional/Additional Columns (for Advanced Insights):

	1.	Technology Used:
	•	LanguageHaveWorkedWith, LanguageWantToWorkWith: Insights into whether familiarity or desire to use certain technologies impact satisfaction.
	•	PlatformHaveWorkedWith, PlatformWantToWorkWith: Similar reasoning as above.
	2.	Organizational Information:
	•	OrgSize: To analyze how company size affects satisfaction (e.g., large enterprises vs. startups).
	•	PurchaseInfluence, BuyNewTool: Optional, if you want to explore how having influence over purchasing decisions at work impacts job satisfaction.
	3.	Time-related Information:
	•	TimeSearching, TimeAnswering: Could be useful for analyzing how time spent on tasks like searching for answers (e.g., coding help) correlates with job frustration and satisfaction.
	4.	AI-related columns (optional):
	•	You may analyze the impact of AI usage in work-related tasks on satisfaction if AI tools are prominent in the dataset.

## Data Cleaning

## 1. Demographic and Personal Information

### Age Distribution

In [8]:
df['Age'].unique()

array(['Under 18 years old', '35-44 years old', '45-54 years old',
       '18-24 years old', '25-34 years old', '55-64 years old',
       'Prefer not to say', '65 years or older'], dtype=object)

In [9]:
df['Age'].isnull()

0        False
1        False
2        False
3        False
4        False
         ...  
65432    False
65433    False
65434    False
65435    False
65436    False
Name: Age, Length: 65437, dtype: bool

No null values in the column.

### Education Level

In [10]:
df['EdLevel'].unique()

array(['Primary/elementary school',
       'Bachelor’s degree (B.A., B.S., B.Eng., etc.)',
       'Master’s degree (M.A., M.S., M.Eng., MBA, etc.)',
       'Some college/university study without earning a degree',
       'Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)',
       'Professional degree (JD, MD, Ph.D, Ed.D, etc.)',
       'Associate degree (A.A., A.S., etc.)', 'Something else', nan],
      dtype=object)

In [11]:
df['EdLevel'].isnull().sum()

4653

In [12]:
# Calculate the mode (most common value) of the 'EdLevel' column
most_common_edlevel = df['EdLevel'].mode()[0]

# Replace NaN values with the mode
df.loc[:, 'EdLevel'] = df['EdLevel'].fillna(most_common_edlevel)

In [13]:
df['EdLevel'].isnull().sum()

0

### Years of Coding (Professional Experience)

In [14]:
df['YearsCodePro'].isnull().sum()

13827

In [15]:
df["YearsCodePro"].value_counts()

YearsCodePro
2                     4168
3                     4093
5                     3526
10                    3251
4                     3215
Less than 1 year      2856
6                     2843
1                     2639
8                     2549
7                     2517
12                    1777
15                    1635
20                    1549
9                     1493
11                    1312
13                    1127
14                    1082
25                     998
16                     946
18                     867
17                     814
30                     689
24                     632
19                     516
22                     492
23                     448
26                     426
21                     380
27                     380
28                     342
35                     285
29                     196
40                     194
32                     194
34                     169
38                     134
33             

In [16]:
df['YearsCodePro'].fillna(method='ffill', inplace=True)

In [17]:
df['YearsCodePro'].isnull().sum()

1

In [18]:
# Drop rows where 'YearsCodePro' is NaN
df.dropna(subset=['YearsCodePro'], inplace=True)

In [19]:
df['YearsCodePro'].isnull().sum()

0

### Years of Coding (Education)

In [20]:
df['YearsCode'].isnull().sum()

5567

In [21]:
df["YearsCode"].value_counts()

YearsCode
10                    4561
5                     3723
6                     3496
8                     3449
7                     3333
4                     3290
15                    2813
20                    2636
12                    2559
3                     2518
9                     2251
14                    1822
25                    1671
2                     1642
11                    1636
13                    1477
30                    1461
16                    1401
18                    1228
17                    1038
40                     993
24                     870
22                     842
35                     735
1                      712
23                     634
26                     630
Less than 1 year       569
19                     561
21                     522
28                     512
27                     504
32                     328
34                     293
42                     289
38                     285
29                

In [22]:
df['YearsCode'].fillna(method='ffill', inplace=True)

In [23]:
df['YearsCode'].isnull().sum()

0

### Work Experience

In [24]:
df['WorkExp'].isnull().sum()


35778

In [25]:
df['WorkExp'].fillna(0, inplace=True)

In [26]:
df['WorkExp'].value_counts()

WorkExp
0.0     35970
3.0      2144
5.0      2050
10.0     2029
2.0      1885
4.0      1767
6.0      1600
7.0      1581
8.0      1554
1.0      1461
15.0     1250
12.0     1249
20.0     1074
9.0       982
11.0      823
25.0      794
13.0      740
14.0      655
16.0      600
17.0      596
18.0      587
30.0      461
24.0      382
19.0      330
22.0      323
23.0      269
26.0      254
21.0      251
35.0      217
27.0      217
28.0      210
40.0      139
29.0      117
32.0      101
33.0       90
36.0       89
34.0       84
31.0       83
37.0       60
38.0       55
50.0       53
42.0       47
45.0       46
39.0       37
41.0       36
43.0       34
44.0       32
46.0       12
48.0        7
47.0        5
49.0        4
Name: count, dtype: int64

### Country

In [27]:
df['Country'].isnull().sum()

6507

In [28]:
df['Country'].value_counts()

Country
United States of America                                11094
Germany                                                  4947
India                                                    4231
United Kingdom of Great Britain and Northern Ireland     3224
Ukraine                                                  2672
                                                        ...  
Central African Republic                                    1
Equatorial Guinea                                           1
Niger                                                       1
Guinea                                                      1
Solomon Islands                                             1
Name: count, Length: 185, dtype: int64

## 2. Job Satisfaction

### Job Satisfaction

In [29]:
df['JobSat'].value_counts()

JobSat
8.0     7509
7.0     6379
6.0     3751
9.0     3626
10.0    2251
5.0     1956
3.0     1165
4.0     1130
2.0      772
0.0      311
1.0      276
Name: count, dtype: int64

In [30]:
df['JobSat'].isnull().sum()

36310

Filling the null values with average job satisfaction.

In [31]:
median_job_sat = int(df['JobSat'].median())
median_job_sat

7

In [32]:
# Fill the NaN values in 'JobSat' with the calculated mean
df['JobSat'].fillna(median_job_sat, inplace=True)

In [33]:
df['JobSat'].value_counts()

JobSat
7.0     42689
8.0      7509
6.0      3751
9.0      3626
10.0     2251
5.0      1956
3.0      1165
4.0      1130
2.0       772
0.0       311
1.0       276
Name: count, dtype: int64

In [34]:
df['JobSat'].isnull().sum()

0

## 3. Employment Information

### Employment Focus

In [35]:
df['MainBranch'].value_counts()

MainBranch
I am a developer by profession                                                           50206
I am not primarily a developer, but I write code sometimes as part of my work/studies     6511
I am learning to code                                                                     3875
I code primarily as a hobby                                                               3334
I used to be a developer by profession, but no longer am                                  1510
Name: count, dtype: int64

In [36]:
df['MainBranch'].isnull().sum()

0

### Type of Employment

In [37]:
df['Employment'].value_counts()

Employment
Employed, full-time                                                                                                                                  39040
Independent contractor, freelancer, or self-employed                                                                                                  4846
Student, full-time                                                                                                                                    4709
Employed, full-time;Independent contractor, freelancer, or self-employed                                                                              3557
Not employed, but looking for work                                                                                                                    2341
                                                                                                                                                     ...  
Employed, full-time;Student, full-time;Independent contract

In [38]:
df['Employment'].isnull().sum()

0

### Remote Work

In [39]:
df['RemoteWork'].value_counts()

RemoteWork
Hybrid (some remote, some in-person)    23015
Remote                                  20830
In-person                               10960
Name: count, dtype: int64

In [40]:
df['RemoteWork'].isnull().sum()

10631

In [41]:
# Fill missing values with a new category 'Unknown'
df['RemoteWork'].fillna('Unknown', inplace=True)

In [42]:
df['RemoteWork'].value_counts()

RemoteWork
Hybrid (some remote, some in-person)    23015
Remote                                  20830
In-person                               10960
Unknown                                 10631
Name: count, dtype: int64

## 4. Compensation Information

### Total Compensation

In [43]:
df['CompTotal'].value_counts()

CompTotal
100000.0    939
60000.0     839
120000.0    793
80000.0     728
50000.0     705
           ... 
59800.0       1
28805.0       1
377532.0      1
518400.0      1
77700.0       1
Name: count, Length: 3337, dtype: int64

In [44]:
df['CompTotal'].isnull().sum()

31696

In [45]:
df['CompTotal'].fillna(0, inplace=True)

In [46]:
df['CompTotal'].isnull().sum()

0

### Converted Yearly Compensation

In [47]:
df['Currency'].isnull().sum()

18752

In [48]:
df.dropna(subset=['Currency'], inplace=True)

In [49]:
df.shape

(46684, 114)

In [50]:
df['Currency'].unique()

array(['PKR\tPakistani rupee', 'EUR European Euro',
       'USD\tUnited States dollar', 'BRL\tBrazilian real',
       'GBP\tPound sterling', 'RON\tRomanian leu', 'INR\tIndian rupee',
       'CHF\tSwiss franc', 'TRY\tTurkish lira', 'RUB\tRussian ruble',
       'ZAR\tSouth African rand', 'CZK\tCzech koruna',
       'CAD\tCanadian dollar', 'IRR\tIranian rial', 'MXN\tMexican peso',
       'UAH\tUkrainian hryvnia', 'DOP\tDominican peso',
       'KMF\tComorian franc', 'RSD\tSerbian dinar', 'PEN\tPeruvian sol',
       'MAD\tMoroccan dirham', 'GEL\tGeorgian lari', 'PLN\tPolish zloty',
       'SAR\tSaudi Arabian riyal', 'SEK\tSwedish krona',
       'BGN\tBulgarian lev', 'KZT\tKazakhstani tenge',
       'SGD\tSingapore dollar', 'JOD\tJordanian dinar',
       'JPY\tJapanese yen', 'NOK\tNorwegian krone',
       'ILS\tIsraeli new shekel', 'DKK\tDanish krone', 'THB\tThai baht',
       'RWF\tRwandan franc', 'HUF\tHungarian forint',
       'BDT\tBangladeshi taka', 'IDR\tIndonesian rupiah',
       'BAM

In [51]:
df['Currency'] = df['Currency'].str[:3]

In [52]:
df['Currency'].unique()

array(['PKR', 'EUR', 'USD', 'BRL', 'GBP', 'RON', 'INR', 'CHF', 'TRY',
       'RUB', 'ZAR', 'CZK', 'CAD', 'IRR', 'MXN', 'UAH', 'DOP', 'KMF',
       'RSD', 'PEN', 'MAD', 'GEL', 'PLN', 'SAR', 'SEK', 'BGN', 'KZT',
       'SGD', 'JOD', 'JPY', 'NOK', 'ILS', 'DKK', 'THB', 'RWF', 'HUF',
       'BDT', 'IDR', 'BAM', 'PHP', 'XOF', 'DZD', 'TND', 'MYR', 'BHD',
       'ARS', 'NIO', 'AFN', 'UYU', 'BYN', 'COP', 'ALL', 'AUD', 'UZS',
       'NZD', 'MVR', 'GHS', 'AED', 'NGN', 'FJD', 'GTQ', 'UGX', 'CRC',
       'MUR', 'KES', 'EGP', 'TWD', 'AMD', 'KRW', 'CLP', 'ISK', 'HNL',
       'HKD', 'CNY', 'VND', 'BSD', 'LKR', 'BTN', 'MNT', 'KHR', 'NPR',
       'BOB', 'ETB', 'AOA', 'MKD', 'SYP', 'NAD', 'ANG', 'TJS', 'BIF',
       'JMD', 'TTD', 'SLL', 'SRD', 'GYD', 'KGS', 'ZMW', 'MDL', 'OMR',
       'CUP', 'XPF', 'KYD', 'TZS', 'KWD', 'TMT', 'QAR', 'YER', 'MWK',
       'IQD', 'IMP', 'KPW', 'XAF', 'MGA', 'PYG', 'ERN', 'MMK', 'SHP',
       'MZN', 'non', 'AZN', 'LYD', 'MOP', 'LBP', 'BND', 'VES', 'SOS',
       'CDF', 'XDR',

In [53]:
currency_to_usd = {
    'PKR': 0.0035,  # Pakistani Rupee
    'EUR': 1.05,    # European Euro
    'USD': 1.00,    # United States Dollar
    'BRL': 0.19,    # Brazilian Real
    'GBP': 1.22,    # Pound Sterling
    'RON': 0.21,    # Romanian Leu
    'INR': 0.012,   # Indian Rupee
    'CHF': 1.09,    # Swiss Franc
    'TRY': 0.036,   # Turkish Lira
    'RUB': 0.01,    # Russian Ruble
    'ZAR': 0.052,   # South African Rand
    'CZK': 0.043,   # Czech Koruna
    'CAD': 0.73,    # Canadian Dollar
    'IRR': 0.000024,# Iranian Rial
    'MXN': 0.056,   # Mexican Peso
    'UAH': 0.027,   # Ukrainian Hryvnia
    'DOP': 0.017,   # Dominican Peso
    'KMF': 0.0022,  # Comorian Franc
    'RSD': 0.0091,  # Serbian Dinar
    'PEN': 0.27,    # Peruvian Sol
    'MAD': 0.097,   # Moroccan Dirham
    'GEL': 0.37,    # Georgian Lari
    'PLN': 0.24,    # Polish Zloty
    'SAR': 0.27,    # Saudi Arabian Riyal
    'SEK': 0.089,   # Swedish Krona
    'BGN': 0.53,    # Bulgarian Lev
    'KZT': 0.0021,  # Kazakhstani Tenge
    'SGD': 0.73,    # Singapore Dollar
    'JOD': 1.41,    # Jordanian Dinar
    'JPY': 0.0067,  # Japanese Yen
    'NOK': 0.091,   # Norwegian Krone
    'ILS': 0.25,    # Israeli Shekel
    'DKK': 0.14,    # Danish Krone
    'THB': 0.027,   # Thai Baht
    'RWF': 0.00083, # Rwandan Franc
    'HUF': 0.0027,  # Hungarian Forint
    'BDT': 0.0091,  # Bangladeshi Taka
    'IDR': 0.000065,# Indonesian Rupiah
    'BAM': 0.53,    # Bosnia and Herzegovina Convertible Mark
    'PHP': 0.018,   # Philippine Peso
    'XOF': 0.0016,  # West African CFA Franc
    'DZD': 0.0074,  # Algerian Dinar
    'TND': 0.32,    # Tunisian Dinar
    'MYR': 0.21,    # Malaysian Ringgit
    'BHD': 2.65,    # Bahraini Dinar
    'ARS': 0.0027,  # Argentine Peso
    'NIO': 0.027,   # Nicaraguan Cordoba
    'AFN': 0.012,   # Afghan Afghani
    'UYU': 0.026,   # Uruguayan Peso
    'BYN': 0.37,    # Belarusian Ruble
    'COP': 0.00024, # Colombian Peso
    'ALL': 0.0095,  # Albanian Lek
    'AUD': 0.64,    # Australian Dollar
    'UZS': 0.000082,# Uzbekistani Som
    'NZD': 0.58,    # New Zealand Dollar
    'MVR': 0.065,   # Maldivian Rufiyaa
    'GHS': 0.086,   # Ghanaian Cedi
    'AED': 0.27,    # United Arab Emirates Dirham
    'NGN': 0.0013,  # Nigerian Naira
    'FJD': 0.44,    # Fijian Dollar
    'GTQ': 0.13,    # Guatemalan Quetzal
    'UGX': 0.00026, # Ugandan Shilling
    'CRC': 0.0018,  # Costa Rican Colon
    'MUR': 0.022,   # Mauritian Rupee
    'KES': 0.0067,  # Kenyan Shilling
    'EGP': 0.032,   # Egyptian Pound
    'TWD': 0.031,   # New Taiwan Dollar
    'AMD': 0.0025,  # Armenian Dram
    'KRW': 0.00074, # South Korean Won
    'CLP': 0.0011,  # Chilean Peso
    'ISK': 0.0072,  # Icelandic Krona
    'HNL': 0.041,   # Honduran Lempira
    'HKD': 0.13,    # Hong Kong Dollar
    'CNY': 0.14,    # Chinese Yuan Renminbi
    'VND': 0.000041,# Vietnamese Dong
    'BSD': 1.00,    # Bahamian Dollar
    'LKR': 0.0031,  # Sri Lankan Rupee
    'BTN': 0.012,   # Bhutanese Ngultrum
    'MNT': 0.00029, # Mongolian Tugrik
    'KHR': 0.00024, # Cambodian Riel
    'NPR': 0.0076,  # Nepalese Rupee
    'BOB': 0.14,    # Bolivian Boliviano
    'ETB': 0.018,   # Ethiopian Birr
    'AOA': 0.0012,  # Angolan Kwanza
    'MKD': 0.017,   # Macedonian Denar
    'SYP': 0.0004,  # Syrian Pound
    'NAD': 0.052,   # Namibian Dollar
    'ANG': 0.56,    # Netherlands Antillean Guilder
    'TJS': 0.091,   # Tajikistani Somoni
    'BIF': 0.00035, # Burundi Franc
    'JMD': 0.0065,  # Jamaican Dollar
    'TTD': 0.15,    # Trinidad and Tobago Dollar
    'SLL': 0.00005, # Sierra Leonean Leone
    'SRD': 0.028,   # Surinamese Dollar
    'GYD': 0.0048,  # Guyanese Dollar
    'KGS': 0.011,   # Kyrgyzstani Som
    'ZMW': 0.047,   # Zambian Kwacha
    'MDL': 0.055,   # Moldovan Leu
    'OMR': 2.60,    # Omani Rial
    'CUP': 0.037,   # Cuban Peso
    'XPF': 0.0090,  # CFP Franc
    'KYD': 1.20,    # Cayman Islands Dollar
    'TZS': 0.00040, # Tanzanian Shilling
    'KWD': 3.24,    # Kuwaiti Dinar
    'TMT': 0.29,    # Turkmen Manat
    'QAR': 0.27,    # Qatari Riyal
    'YER': 0.0040,  # Yemeni Rial
    'MWK': 0.00087, # Malawian Kwacha
    'IQD': 0.00077, # Iraqi Dinar
    'IMP': 1.22,    # Manx Pound
    'KPW': 0.0011,  # North Korean Won
    'XAF': 0.0016,  # Central African CFA Franc
    'MGA': 0.00022, # Malagasy Ariary
    'PYG': 0.00014, # Paraguayan Guarani
    'ERN': 0.066,   # Eritrean Nakfa
    'MMK': 0.00048, # Myanmar Kyat
    'SHP': 1.22,    # Saint Helena Pound
    'MZN': 0.016,   # Mozambican Metical
    'AZN': 0.59,    # Azerbaijani Manat
    'LYD': 0.20,    # Libyan
    'MOP': 0.12,    # Macanese Pataca
    'LBP': 0.000065,# Lebanese Pound
    'BND': 0.73,    # Brunei Dollar
    'VES': 0.0000003,# Venezuelan Bolivar
    'SOS': 0.0018,  # Somali Shilling
    'CDF': 0.00041, # Congolese Franc
    'XDR': 1.38,    # SDR (Special Drawing Right)
    'MRU': 0.027,   # Mauritanian Ouguiya
    'WST': 0.36,    # Samoan Tala
    'SDG': 0.0017,  # Sudanese Pound
    'XCD': 0.37,    # East Caribbean Dollar
    'FKP': 1.22,    # Falkland Islands Pound
    'BWP': 0.074,   # Botswana Pula
    'GGP': 1.22,    # Guernsey Pound
    'CVE': 0.0094,  # Cape Verdean Escudo
    'GIP': 1.22,    # Gibraltar Pound
    'SZL': 0.052,   # Swazi Lilangeni
    'AWG': 0.56,    # Aruban Florin
    'BBD': 0.50,    # Barbadian Dollar
    'BMD': 1.00     # Bermudian Dollar
}

Converting all the compensations in USD for uniform data analysis

In [54]:
# Step 2: Function to convert compensation to USD
def convert_to_usd(row):
    currency = row['Currency'].strip()  # Ensure no whitespace
    conversion_rate = currency_to_usd.get(currency, 0)  # Default to 0 if currency not found
    return row['CompTotal'] * conversion_rate

# Step 3: Apply the conversion function
df['CompTotalUSD'] = df.apply(convert_to_usd, axis=1)

In [55]:
df['CompTotalUSD'].isnull().sum()

0

In [56]:
# Ensure CompTotalUSD is in float for calculations
df['CompTotalUSD'] = df['CompTotalUSD'].astype(float)

# Calculate Q1 (25th percentile) and Q3 (75th percentile)
Q1 = df['CompTotalUSD'].quantile(0.25)
Q3 = df['CompTotalUSD'].quantile(0.75)
IQR = Q3 - Q1

# Determine the bounds for outliers
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Modify the existing DataFrame by filtering out outliers in place
df = df[(df['CompTotalUSD'] >= lower_bound) & (df['CompTotalUSD'] <= upper_bound)]

# Convert the 'CompTotalUSD' column to integers
df['CompTotalUSD'] = df['CompTotalUSD'].astype(int)

## 5. Job Role and Type

### Job Description

In [57]:
df['DevType'].isnull().sum()

170

In [58]:
df['DevType'].unique()

array(['Data scientist or machine learning specialist',
       'Developer, mobile', 'Academic researcher',
       'Developer, full-stack', 'Developer, back-end',
       'Engineering manager', 'Student', 'Other (please specify):',
       'Developer, QA or test', 'Developer, front-end',
       'Developer, embedded applications or devices', 'Developer, AI',
       'Product manager', 'Senior Executive (C-Suite, VP, etc.)', nan,
       'System administrator', 'Data or business analyst',
       'Developer, desktop or enterprise applications',
       'Cloud infrastructure engineer', 'Research & Development role',
       'DevOps specialist', 'Security professional', 'Educator',
       'Project manager', 'Developer, game or graphics', 'Blockchain',
       'Developer Experience', 'Engineer, site reliability', 'Scientist',
       'Designer', 'Hardware Engineer', 'Database administrator',
       'Data engineer', 'Developer Advocate',
       'Marketing or sales professional'], dtype=object)

In [59]:
df.dropna(subset=['DevType'], inplace=True)

In [60]:
df.shape

(45065, 115)

## 6. Technology Used

### Programming Languages Worked With

In [61]:
df['LanguageHaveWorkedWith'].isnull().sum()

637

In [62]:
df['LanguageHaveWorkedWith'].unique()

array(['Assembly;Bash/Shell (all shells);C;C++;HTML/CSS;Java;JavaScript;Python;R;SQL;TypeScript',
       'Bash/Shell (all shells);Go;Java;JavaScript;Kotlin;Objective-C;Python;Swift',
       'HTML/CSS;JavaScript;PHP;SQL', ...,
       'Elixir;HTML/CSS;JavaScript;Lua;PowerShell;Python;Ruby;SQL',
       'Bash/Shell (all shells);C;C#;C++;Delphi;Groovy;HTML/CSS;Java;JavaScript;Objective-C;PowerShell;Python;SQL;Swift;TypeScript',
       'C;C++;Go;Lua;Objective-C;Python;Rust;SQL'], dtype=object)

In [63]:
df.dropna(subset=['LanguageHaveWorkedWith'], inplace=True)

In [64]:
df.shape

(44428, 115)

### Programming Languages Want To Work With

In [65]:
df['LanguageWantToWorkWith'].isnull().sum()

2820

In [66]:
df['LanguageWantToWorkWith'].unique()

array(['C#;Rust;Scala', nan, 'HTML/CSS;JavaScript;PHP;SQL', ...,
       'Dart;Elixir;Go;Lua;Python;Ruby;SQL;Zig', 'Elixir;Rust;Swift',
       'Bash/Shell (all shells);C#;Go;HTML/CSS;Java;JavaScript;Kotlin;Objective-C;Python;Rust;SQL;Swift;TypeScript'],
      dtype=object)

In [67]:
df.dropna(subset=['LanguageWantToWorkWith'], inplace=True)

In [68]:
df.shape

(41608, 115)

### Platform Worked With

In [69]:
df['PlatformHaveWorkedWith'].isnull().sum()

10547

In [70]:
df['PlatformHaveWorkedWith'].fillna('Missing', inplace=True)

In [71]:
df.shape

(41608, 115)

### Platform Want To Work With

In [72]:
df['PlatformWantToWorkWith'].isnull().sum()

14761

In [73]:
df['PlatformWantToWorkWith'].fillna('Unknown', inplace=True)

In [74]:
df.shape

(41608, 115)

## 7. Organisational Information

### Size of Organisation

In [75]:
df['OrgSize'].isnull().sum()

123

In [76]:
df.dropna(subset=['OrgSize'], inplace=True)

In [77]:
df.shape

(41485, 115)

In [78]:
df['OrgSize'].unique()

array(['100 to 499 employees',
       'Just me - I am a freelancer, sole proprietor, etc.',
       '10 to 19 employees', '20 to 99 employees',
       '5,000 to 9,999 employees', '1,000 to 4,999 employees',
       'I don’t know', '10,000 or more employees', '2 to 9 employees',
       '500 to 999 employees'], dtype=object)

### Purchase Influence

In [79]:
df['PurchaseInfluence'].unique()

array(['I have some influence', 'I have little or no influence',
       'I have a great deal of influence', nan], dtype=object)

In [80]:
df['PurchaseInfluence'].isnull().sum()

113

In [81]:
df.dropna(subset=['PurchaseInfluence'], inplace=True)

In [82]:
df.shape

(41372, 115)

### Buy New Tool

In [83]:
df['BuyNewTool'].unique()

array(['Start a free trial;Ask developers I know/work with;Ask a generative AI tool;Visit developer communities like Stack Overflow;Read ratings or reviews on third party sites like G2 Crowd',
       'Start a free trial;Ask developers I know/work with;Visit developer communities like Stack Overflow',
       'Start a free trial;Ask developers I know/work with;Visit developer communities like Stack Overflow;Read ratings or reviews on third party sites like G2 Crowd;Research companies that have advertised on sites I visit;Research companies that have emailed me',
       'Ask developers I know/work with;Research companies that have emailed me',
       'Ask developers I know/work with;Read ratings or reviews on third party sites like G2 Crowd',
       nan, 'Ask developers I know/work with',
       'Start a free trial;Research companies that have advertised on sites I visit;Research companies that have emailed me',
       'Start a free trial;Ask developers I know/work with;Ask a generative A

In [84]:
df['BuyNewTool'].isnull().sum()

1744

In [85]:
df['BuyNewTool'].fillna('Unknown', inplace=True)

## 8. Time-Related Information

### TimeSearching

In [86]:
df['TimeSearching'].unique()

array(['30-60 minutes a day', nan, 'Less than 15 minutes a day',
       '60-120 minutes a day', '15-30 minutes a day',
       'Over 120 minutes a day'], dtype=object)

In [87]:
df['TimeSearching'].isnull().sum()

15731

In [88]:
df['TimeSearching'].fillna('Unknown', inplace=True)

### TimeAnswering

In [89]:
df['TimeAnswering'].unique()

array(['60-120 minutes a day', nan, 'Less than 15 minutes a day',
       '30-60 minutes a day', '15-30 minutes a day',
       'Over 120 minutes a day'], dtype=object)

In [90]:
df['TimeAnswering'].isnull().sum()

15785

In [91]:
df['TimeAnswering'].fillna('Unknown', inplace=True)

## 9. Artificial Intelligence Impact

### AISelect,Do you currently use AI tools in your development process?

In [92]:
df['AISelect'].unique()

array(['Yes', "No, and I don't plan to", 'No, but I plan to soon', nan],
      dtype=object)

In [93]:
df['AISelect'].isnull().sum()

157

In [94]:
df['AISelect'].fillna('Unknown', inplace=True)

QID315,AISent,How favorable is your stance on using AI tools as part of your development workflow?,FALSE,MC,SAVR
QID324,AIBen,"For the AI tools you use as part of your development workflow, what are the MOST important benefits you are hoping to achieve? Please check all that apply.",FALSE,MC,MAVR
QID316,AIAcc,How much do you trust the accuracy of the output from AI tools as part of your development workflow?,FALSE,MC,SAVR
QID343,AIComplex,How well do the AI tools you use in your development workflow handle complex tasks?,FALSE,MC,SAVR
QID319,AITool,Which parts of your development workflow are you currently using AI tools for and which are you interested in using AI tools for over the next year?  Please select all that apply.,FALSE,Matrix,Likert
QID320,AINext,"Thinking about how your job and process changes over time, how integrated in your workflow do you anticipate AI tools you are currently using will be 1 year from now?",FALSE,Matrix,Likert
QID338,AIThreat,Do you believe AI is a threat to your current job?,FALSE,MC,SAVR
QID339,AIEthics,Which AI ethical responsibilities are most important to you?  Select all that apply.,FALSE,MC,MAVR
QID346,AIChallenges,What are the challenges to your company/whole team using AI code assistants or GenAI tools? Select all that apply.,FALSE,MC,MAVR


### AISent,How favorable is your stance on using AI tools as part of your development workflow?

In [95]:
df['AISent'].unique()

array(['Very favorable', 'Indifferent', nan, 'Favorable', 'Unfavorable',
       'Unsure', 'Very unfavorable'], dtype=object)

In [96]:
df['AISent'].isnull().sum()

10322

In [97]:
df['AISent'].fillna('Unknown', inplace=True)

### AIBen,"For the AI tools you use as part of your development workflow, what are the MOST important benefits you are hoping to achieve? Please check all that apply."

In [98]:
df['AIBen'].unique()

array(['Increase productivity;Greater efficiency;Improve collaboration;Speed up learning;Improve accuracy in coding;Make workload more manageable',
       'Increase productivity', nan,
       'Increase productivity;Greater efficiency',
       'Increase productivity;Speed up learning',
       'Increase productivity;Greater efficiency;Speed up learning;Improve accuracy in coding',
       'Improve accuracy in coding', 'Speed up learning',
       'Increase productivity;Speed up learning;Make workload more manageable',
       'Greater efficiency;Speed up learning', 'Other (please specify):',
       'Increase productivity;Greater efficiency;Make workload more manageable',
       'Greater efficiency;Improve accuracy in coding',
       'Increase productivity;Greater efficiency;Speed up learning',
       'Greater efficiency',
       'Increase productivity;Greater efficiency;Improve accuracy in coding',
       'Increase productivity;Greater efficiency;Speed up learning;Improve accuracy in coding

In [99]:
df['AIBen'].isnull().sum()

16318

In [100]:
df['AIBen'].fillna('Unknown', inplace=True)

### AIAcc,How much do you trust the accuracy of the output from AI tools as part of your development workflow?

In [101]:
df['AIAcc'].unique()

array(['Somewhat trust', 'Somewhat distrust', nan,
       'Neither trust nor distrust', 'Highly distrust', 'Highly trust'],
      dtype=object)

In [102]:
df['AIAcc'].isnull().sum()

16022

In [103]:
df['AIAcc'].fillna('Unknown', inplace=True)

### AIComplex,How well do the AI tools you use in your development workflow handle complex tasks?

In [104]:
df['AIComplex'].unique()

array(['Good, but not great at handling complex tasks',
       'Very poor at handling complex tasks', nan,
       'Neither good or bad at handling complex tasks',
       'Bad at handling complex tasks',
       'Very well at handling complex tasks'], dtype=object)

In [105]:
df['AIComplex'].isnull().sum()

16159

In [106]:
df['AIComplex'].fillna('Unknown', inplace=True)

### AITool,Which parts of your development workflow are you currently using AI tools for and which are you interested in using AI tools for over the next year?  Please select all that apply.

In [107]:
df['AIToolCurrently Using'].unique()

array(['Learning about a codebase;Writing code;Documenting code;Debugging and getting help;Search for answers;Generating content or synthetic data',
       'Writing code', nan, ...,
       'Writing code;Debugging and getting help;Testing code;Committing and reviewing code;Search for answers;Generating content or synthetic data;Other (please specify):',
       'Learning about a codebase;Project planning;Writing code;Predictive analytics;Search for answers;Generating content or synthetic data',
       'Learning about a codebase;Writing code;Committing and reviewing code;Deployment and monitoring;Predictive analytics;Search for answers'],
      dtype=object)

In [108]:
df['AIToolCurrently Using'].isnull().sum()

17004

In [109]:
df['AIToolInterested in Using'].unique()

array(['Project planning;Testing code;Committing and reviewing code;Deployment and monitoring;Predictive analytics',
       nan, 'Learning about a codebase', ...,
       'Learning about a codebase;Documenting code;Deployment and monitoring;Search for answers;Other (please specify):',
       'Learning about a codebase;Documenting code;Testing code;Committing and reviewing code;Deployment and monitoring;Generating content or synthetic data;Other (please specify):',
       'Debugging and getting help;Testing code;Generating content or synthetic data;Other (please specify):'],
      dtype=object)

In [110]:
df['AIToolInterested in Using'].isnull().sum()

19754

In [111]:
df['AIToolNot interested in Using'].unique()

array([nan,
       'Project planning;Committing and reviewing code;Deployment and monitoring;Predictive analytics',
       'Project planning;Deployment and monitoring;Predictive analytics;Generating content or synthetic data',
       ...,
       'Learning about a codebase;Project planning;Committing and reviewing code;Predictive analytics;Other (please specify):',
       'Writing code;Documenting code;Testing code;Committing and reviewing code;Deployment and monitoring;Predictive analytics;Generating content or synthetic data',
       'Learning about a codebase;Predictive analytics;Other (please specify):'],
      dtype=object)

In [112]:
df['AIToolNot interested in Using'].isnull().sum()

24057

In [113]:
df[['AIToolCurrently Using', 'AIToolInterested in Using', 'AIToolNot interested in Using']].fillna('Unknown', inplace=True)

### AIThreat,Do you believe AI is a threat to your current job?

In [114]:
df['AIThreat'].unique()

array(['No', nan, "I'm not sure", 'Yes'], dtype=object)

In [115]:
df['AIThreat'].isnull().sum()

10386

In [116]:
df['AIThreat'].fillna('Unknown', inplace=True)

### AIEthics,Which AI ethical responsibilities are most important to you?  Select all that apply.

In [117]:
df['AIEthics'].unique()

array(['Circulating misinformation or disinformation;Missing or incorrect attribution for sources of data;Biased results that do not represent diverse viewpoints',
       "Circulating misinformation or disinformation;Missing or incorrect attribution for sources of data;Biased results that do not represent diverse viewpoints;Imitating a person's likeness;Energy demand",
       nan,
       'Circulating misinformation or disinformation;Missing or incorrect attribution for sources of data;Replacing jobs without options for new employment opportunities',
       'Circulating misinformation or disinformation;Missing or incorrect attribution for sources of data;Energy demand',
       'Circulating misinformation or disinformation;Energy demand',
       "Missing or incorrect attribution for sources of data;Imitating a person's likeness;Energy demand",
       'Other (please specify):',
       "Circulating misinformation or disinformation;Missing or incorrect attribution for sources of data;Replac

In [118]:
df['AIEthics'].isnull().sum()

12147

In [119]:
df['AIEthics'].fillna('Unknown', inplace=True)

### AIChallenges,What are the challenges to your company/whole team using AI code assistants or GenAI tools? Select all that apply.

In [120]:
df['AIChallenges'].unique()

array(['AI tools lack context of codebase,  internal architecture, and/or company knowledge;Lack of proper training and education on new tools',
       'Don’t trust the output or answers', nan,
       'AI tools lack context of codebase,  internal architecture, and/or company knowledge;Lack of proper training and education on new tools;They create more work (more code/PRs to review, etc.)',
       'Don’t trust the output or answers;Lack of executive buy-in;Not everyone uses them;They create more work (more code/PRs to review, etc.)',
       'AI tools lack context of codebase,  internal architecture, and/or company knowledge;Lack of proper training and education on new tools;Not everyone uses them',
       'Don’t trust the output or answers;AI tools lack context of codebase,  internal architecture, and/or company knowledge',
       'Don’t trust the output or answers;AI tools lack context of codebase,  internal architecture, and/or company knowledge;Not everyone uses them;They create more

In [121]:
df['AIChallenges'].isnull().sum()

13999

In [122]:
df['AIChallenges'].fillna('Unknown', inplace=True)

In [123]:
cols = [
    "ResponseId", "Age", "EdLevel", "YearsCode", "YearsCodePro", "WorkExp", "Country", "JobSat", "MainBranch", "Employment", "RemoteWork", 
    "CompTotalUSD", "DevType", "LanguageHaveWorkedWith", "LanguageWantToWorkWith", "PlatformHaveWorkedWith", "PlatformWantToWorkWith", 
    "OrgSize", "PurchaseInfluence", "BuyNewTool", "TimeSearching", "TimeAnswering", "AISelect", "AISent", "AIBen", "AIAcc", "AIComplex", 
    "AIToolCurrently Using", "AIToolInterested in Using", "AIToolNot interested in Using", "AIThreat", "AIEthics", "AIChallenges",
]

df_clean = df[cols].copy()

In [124]:
df_clean.describe()

Unnamed: 0,ResponseId,WorkExp,JobSat,CompTotalUSD
count,41372.0,41372.0,41372.0,41372.0
mean,29573.088852,7.064101,6.959949,53022.808735
std,17415.143146,8.945272,1.633585,54953.950162
min,73.0,0.0,0.0,0.0
25%,14704.75,0.0,7.0,0.0
50%,29015.5,4.0,7.0,42000.0
75%,43842.5,11.0,8.0,84800.0
max,65436.0,50.0,10.0,225000.0


In [125]:
df_clean

Unnamed: 0,ResponseId,Age,EdLevel,YearsCode,YearsCodePro,WorkExp,Country,JobSat,MainBranch,Employment,...,AISent,AIBen,AIAcc,AIComplex,AIToolCurrently Using,AIToolInterested in Using,AIToolNot interested in Using,AIThreat,AIEthics,AIChallenges
72,73,18-24 years old,"Secondary school (e.g. American high school, G...",3,1,3.0,Pakistan,10.0,I am a developer by profession,"Employed, full-time;Student, full-time;Indepen...",...,Very favorable,Increase productivity;Greater efficiency;Impro...,Somewhat trust,"Good, but not great at handling complex tasks",Learning about a codebase;Writing code;Documen...,Project planning;Testing code;Committing and r...,,No,Circulating misinformation or disinformation;M...,"AI tools lack context of codebase, internal a..."
367,368,65 years or older,Some college/university study without earning ...,15,9,0.0,United States of America,7.0,"I am not primarily a developer, but I write co...","Independent contractor, freelancer, or self-em...",...,Indifferent,Increase productivity,Somewhat distrust,Very poor at handling complex tasks,Writing code,,,No,Circulating misinformation or disinformation;M...,Don’t trust the output or answers
374,375,25-34 years old,"Professional degree (JD, MD, Ph.D, Ed.D, etc.)",12,6,0.0,Austria,7.0,"I am not primarily a developer, but I write co...","Employed, full-time",...,Unknown,Unknown,Unknown,Unknown,,,,Unknown,Unknown,Unknown
377,378,35-44 years old,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11,11,0.0,Brazil,7.0,I am a developer by profession,"Employed, full-time",...,Favorable,Increase productivity,Neither trust nor distrust,Neither good or bad at handling complex tasks,Search for answers,,,No,Circulating misinformation or disinformation;M...,"AI tools lack context of codebase, internal a..."
379,380,35-44 years old,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",15,6,7.0,Turkey,10.0,I am a developer by profession,"Employed, full-time",...,Very favorable,Increase productivity;Greater efficiency,Somewhat trust,Bad at handling complex tasks,Writing code;Documenting code;Debugging and ge...,Learning about a codebase,Project planning;Committing and reviewing code...,No,Circulating misinformation or disinformation;M...,Don’t trust the output or answers;Lack of exec...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65396,65397,18-24 years old,"Secondary school (e.g. American high school, G...",3,3,0.0,Lithuania,7.0,I am a developer by profession,"Employed, full-time;Independent contractor, fr...",...,Unknown,Unknown,Unknown,Unknown,,,,Unknown,Unknown,Unknown
65401,65402,25-34 years old,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",10,7,0.0,France,7.0,I am a developer by profession,"Employed, full-time",...,Unknown,Unknown,Unknown,Unknown,,,,Unknown,Unknown,Unknown
65408,65409,25-34 years old,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",13,9,9.0,France,7.0,I am a developer by profession,"Employed, full-time",...,Unknown,Unknown,Unknown,Unknown,,,,Unknown,Unknown,Unknown
65431,65432,45-54 years old,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",38,24,0.0,Belgium,7.0,I am a developer by profession,"Employed, full-time",...,Favorable,Unknown,Unknown,Unknown,,,,I'm not sure,Circulating misinformation or disinformation;M...,Don’t trust the output or answers;AI tools lac...


In [126]:
df_clean.dtypes

ResponseId                         int64
Age                               object
EdLevel                           object
YearsCode                         object
YearsCodePro                      object
WorkExp                          float64
Country                           object
JobSat                           float64
MainBranch                        object
Employment                        object
RemoteWork                        object
CompTotalUSD                       int64
DevType                           object
LanguageHaveWorkedWith            object
LanguageWantToWorkWith            object
PlatformHaveWorkedWith            object
PlatformWantToWorkWith            object
OrgSize                           object
PurchaseInfluence                 object
BuyNewTool                        object
TimeSearching                     object
TimeAnswering                     object
AISelect                          object
AISent                            object
AIBen           

In [127]:
df_clean.to_csv('cleaned/cleaned_data.csv', index=False)