記得要先把資料集上傳到Colab才能正常執行!

## 1. 原始資料

### 1.1 載入資料

In [1]:
import pandas as pd
data = pd.read_csv('dataset/join_database_w_definitions.csv',skiprows=3) #載入資料並省略前三列標頭

### 1.2 觀察所有的資料欄位

In [2]:
data.columns.values #所有column的名稱

array(['Country Name', 'Country Code', 'Region Code', 'Subsample',
       'Income Level Name', 'Income Level Code', 'Year of survey',
       'Survey Data Source', 'Survey Type', 'Total population',
       'Children, aged 0-14', 'Youth, aged 15-24', 'Adult, aged 25-64',
       'Elderly, aged 65+', 'Urban Population (% of total Population)',
       ' Working Age Population, aged 15-64 (% of total Pop.)',
       'Dependency Rate, all compared to 15-64',
       'Youth Dependency Rate, younger than 15 compared to 15-64',
       'Old Age Dependency Rate, older than 64 compared to 15-64',
       'Labor Force, aged 15-64',
       'Labor Force Participation Rate, aged 15-64',
       'Female Labor Force Participation Rate, aged 15-64',
       'Not in labor force or education rate among youth, aged 15-24',
       'Share of workers (aged 15-64) with more than one jobs in last week',
       'Employment to Population Ratio, aged 15-64',
       'Employment Rate, aged 15-64 ', 'Unemployment Rate, aged

### 1.3 移除多餘的欄位 (Unnamed開頭)

In [3]:
for column_name in data.columns.values: #移除不必要的column
  if(column_name.startswith('Unnamed:')):
    data.drop(column_name,axis=1,inplace=True)

### 1.4 原始資料的各種取樣資料量

In [4]:
data.groupby('Subsample').size() #各種取樣的資料量分布

Subsample
All               1790
Female            1671
High Education    1573
Low Education     1619
Male              1671
Old               1669
Rural             1467
Urban             1499
Young             1669
dtype: int64

## 2. 整理出不同抽樣樣本的資料

### 2.1 根據Subsample欄位，整理成4種資料 (全部、性別、地區、教育程度)

In [5]:
#僅保留全部取樣的資料
data_all = data[data['Subsample']=="All"].copy(deep=True) 
data_all.to_csv('dataset/join_database_all.csv')
#依照性別(男性、女性)取樣
data_F = data[data['Subsample']=="Female"].copy(deep=True) #僅保留女性取樣的資料
data_M = data[data['Subsample']=="Male"].copy(deep=True) #僅保留男性取樣的資料
data_Gender = pd.concat([data_F,data_M])
data_Gender.reset_index(inplace=True)
data_Gender.drop(['index'],axis=1,inplace=True)
data_Gender.to_csv('dataset/join_database_gender.csv')
#依照居住地區(城市、鄉村)取樣
data_Urban = data[data['Subsample']=="Urban"].copy(deep=True) #僅保留城市地區取樣的資料
data_Rural = data[data['Subsample']=="Rural"].copy(deep=True) #僅保留鄉村地區取樣的資料
data_Location = pd.concat([data_Urban,data_Rural])
data_Location.reset_index(inplace=True)
data_Location.drop(['index'],axis=1,inplace=True)
data_Location.to_csv('dataset/join_database_location.csv')
#依照教育程度(高、低)取樣
data_HighEDU = data[data['Subsample']=="High Education"].copy(deep=True) #僅保留高教育程度取樣的資料
data_LowEDU = data[data['Subsample']=="Low Education"].copy(deep=True) #僅保留低教育程度取樣的資料
data_EDU = pd.concat([data_HighEDU,data_LowEDU])
data_EDU.reset_index(inplace=True)
data_EDU.drop(['index'],axis=1,inplace=True)
data_EDU.to_csv('dataset/join_database_education.csv')

### 2.2 觀察各年資料筆數

In [39]:
print("Subsample = all")
data_per_year = data_all.groupby('Year of survey').size()
counter = 0
for i in data_per_year.keys():
    if(data_per_year[i]<10):
        print(str(i)+" -> "+ str(data_per_year[i])+" ",sep="  ",end="  ")
    else:
        print(str(i)+" -> "+ str(data_per_year[i]),sep="  ",end="  ")
    counter+=1
    if(counter%5==0):
        print()


Subsample = all
1970 -> 3   1974 -> 1   1980 -> 4   1981 -> 2   1982 -> 2   
1983 -> 4   1984 -> 3   1985 -> 5   1986 -> 6   1987 -> 10  
1988 -> 7   1989 -> 10  1990 -> 10  1991 -> 14  1992 -> 17  
1993 -> 20  1994 -> 18  1995 -> 23  1996 -> 23  1997 -> 31  
1998 -> 46  1999 -> 47  2000 -> 53  2001 -> 58  2002 -> 63  
2003 -> 71  2004 -> 73  2005 -> 89  2006 -> 74  2007 -> 79  
2008 -> 75  2009 -> 87  2010 -> 85  2011 -> 86  2012 -> 85  
2013 -> 86  2014 -> 82  2015 -> 83  2016 -> 77  2017 -> 49  
2018 -> 51  2019 -> 40  2020 -> 23  2021 -> 15  

In [55]:
print("Subsample = all")
print("Mean age of worker between 15-64 = NA")
data_per_year = data_all[data_all["Mean age of worker between 15-64"].isna()==True].groupby("Year of survey").size()
counter = 0
for i in data_per_year.keys():
    if(data_per_year[i]<10):
        print(str(i)+" -> "+ str(data_per_year[i])+" ",sep="  ",end="  ")
    else:
        print(str(i)+" -> "+ str(data_per_year[i]),sep="  ",end="  ")
    counter+=1
    if(counter%5==0):
        print()

Subsample = all
Mean age of worker between 15-64 = NA
1996 -> 1   1997 -> 1   1998 -> 2   1999 -> 1   2004 -> 1   
2005 -> 1   2006 -> 3   2007 -> 2   2009 -> 1   2011 -> 2   
2012 -> 2   2013 -> 2   2014 -> 1   2015 -> 1   2016 -> 2   
2017 -> 3   2018 -> 3   2019 -> 3   2020 -> 2   

In [59]:
import numpy as np
new_income_data = data_all[data_all["Year of survey"]>=1992].copy(deep=True)
#new_income_data["Mean age of worker between 15-64"] = new_income_data["Mean age of worker between 15-64"].fillna(0)
new_income_data["Mean age of worker between 15-64"] = new_income_data["Mean age of worker between 15-64"].fillna(np.mean(new_income_data["Mean age of worker between 15-64"]))

In [61]:
new_income_data.groupby("Income Level Name").size()

Income Level Name
High income            608
Low income             142
Lower middle income    430
Upper middle income    529
dtype: int64

In [44]:
#new_income_data

In [62]:
meanAgeS = []
for i in range(1992,2022,1):
    total_population = new_income_data[(new_income_data["Year of survey"] == i) & (new_income_data["Income Level Name"] == "Low income")]["Total population"]
    meanAge = sum(new_income_data[(new_income_data["Year of survey"] == i) & (new_income_data["Income Level Name"] == "Low income")]['Mean age of worker between 15-64'] * total_population)/sum(total_population)
    meanAgeS.append(meanAge)
new_income_data_L = {"year": [year for year in range(1992,2022,1)], "income_level": ["Low income"]*30, "mean_age_income_level": meanAgeS}

meanAgeS = []
for i in range(1992,2022,1):
    total_population = new_income_data[(new_income_data["Year of survey"] == i) & (new_income_data["Income Level Name"] == "Lower middle income")]["Total population"]
    meanAge = sum(new_income_data[(new_income_data["Year of survey"] == i) & (new_income_data["Income Level Name"] == "Lower middle income")]['Mean age of worker between 15-64'] * total_population)/sum(total_population)
    meanAgeS.append(meanAge)
new_income_data_LM = {"year": [year for year in range(1992,2022,1)], "income_level": ["Lower middle income"]*30, "mean_age_income_level": meanAgeS}

meanAgeS = []
for i in range(1992,2022,1):
    total_population = new_income_data[(new_income_data["Year of survey"] == i) & (new_income_data["Income Level Name"] == "Upper middle income")]["Total population"]
    meanAge = sum(new_income_data[(new_income_data["Year of survey"] == i) & (new_income_data["Income Level Name"] == "Upper middle income")]['Mean age of worker between 15-64'] * total_population)/sum(total_population)
    meanAgeS.append(meanAge)
new_income_data_UM = {"year": [year for year in range(1992,2022,1)], "income_level": ["Upper middle income"]*30, "mean_age_income_level": meanAgeS}

meanAgeS = []
for i in range(1992,2022,1):
    total_population = new_income_data[(new_income_data["Year of survey"] == i) & (new_income_data["Income Level Name"] == "High income")]["Total population"]
    meanAge = sum(new_income_data[(new_income_data["Year of survey"] == i) & (new_income_data["Income Level Name"] == "High income")]['Mean age of worker between 15-64'] * total_population)/sum(total_population)
    meanAgeS.append(meanAge)
new_income_data_H = {"year": [year for year in range(1992,2022,1)], "income_level": ["High income"]*30, "mean_age_income_level": meanAgeS}

In [63]:
dfL = pd.DataFrame(new_income_data_L)
dfLM = pd.DataFrame(new_income_data_LM)
dfUM = pd.DataFrame(new_income_data_UM)
dfH = pd.DataFrame(new_income_data_H)
new_income_data_final = pd.concat([dfH,dfUM,dfLM,dfL],axis=0,ignore_index=True)

In [64]:
new_income_data_final.groupby("income_level").size()

income_level
High income            30
Low income             30
Lower middle income    30
Upper middle income    30
dtype: int64

In [65]:
new_income_data_final.to_csv('cleanDataset/clean_income.csv',index=False)

In [13]:
data_Gender

Unnamed: 0,Country Name,Country Code,Region Code,Subsample,Income Level Name,Income Level Code,Year of survey,Survey Data Source,Survey Type,Total population,...,"Mean age of worker in agriculture, between 15-64","Mean age of worker in industry, between 15-64","Mean age of worker in services, between 15-64","Mean age of wage worker, between 15-64","Mean age of self-employed or unpaid worker, between 15-64","Mean age of employer, between 15-64",Total flags in survey,"Percentile (100 = max), total flags",Flags over max number of flags,"Percentile (100 = max), share of flags"
0,Bulgaria,BGR,ECS,Female,Upper middle income,UMC,2009,BGR_2009_I2D2_EUROSILC.dta,I2D2,3925271,...,43.66,41.66,41.69,41.53,45.06,41.85,58.0,46.0,0.05,41.0
1,Bulgaria,BGR,ECS,Female,Upper middle income,UMC,2010,BGR_2010_I2D2_EUROSILC.dta,I2D2,3904399,...,44.92,42.01,42.13,41.96,45.88,42.98,57.0,45.0,0.05,39.0
2,Bulgaria,BGR,ECS,Female,Upper middle income,UMC,2011,BGR_2011_I2D2_EUROSILC.dta,I2D2,3883330,...,45.98,42.16,42.28,42.23,45.06,45.30,50.0,34.0,0.04,32.0
3,Bulgaria,BGR,ECS,Female,Upper middle income,UMC,2012,BGR_2012_I2D2_EUROSILC.dta,I2D2,3770787,...,44.73,42.49,43.03,42.83,45.73,43.11,55.0,42.0,0.05,39.0
4,Bulgaria,BGR,ECS,Female,Upper middle income,UMC,2013,BGR_2013_I2D2_EUROSILC.dta,I2D2,3731765,...,45.64,42.81,43.00,42.92,45.20,45.68,54.0,40.0,0.05,38.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3337,South Africa,ZAF,SSF,Male,Upper middle income,UMC,2018,zaf_2018_qlfs_v01_m_v06_a_gld_all.dta,qlfs,27688056,...,38.05,38.65,38.84,38.16,40.04,43.29,30.0,8.0,0.02,4.0
3338,South Africa,ZAF,SSF,Male,Upper middle income,UMC,2019,zaf_2019_qlfs_v01_m_v06_a_gld_all.dta,qlfs,28051696,...,38.38,39.00,39.04,38.42,40.30,43.19,30.0,8.0,0.02,4.0
3339,South Africa,ZAF,SSF,Male,Upper middle income,UMC,2020,zaf_2020_qlfs_v02_m_v05_a_gld_all.dta,qlfs,28415249,...,39.26,39.57,39.41,38.94,40.40,43.25,38.0,18.0,0.03,12.0
3340,Zambia,ZMB,SSF,Male,Lower middle income,LMC,2015,zmb_2015_lcms-vii_v01_m_v04_a_gmd.dta,lcms-,7490127,...,35.80,35.77,36.18,36.06,36.13,38.86,18.0,3.0,0.02,2.0


In [33]:
data_Gender["Mean age of worker between 15-64"]*data_Gender["Total population"]

0       1.638801e+08
1       1.648828e+08
2       1.647309e+08
3       1.620684e+08
4       1.602793e+08
            ...     
3337    1.072358e+09
3338    1.093455e+09
3339    1.120982e+09
3340    2.682963e+08
3341    2.086487e+08
Length: 3342, dtype: float64

In [67]:
#data_Gender["Mean age of worker between 15-64"] = data_Gender["Mean age of worker between 15-64"].fillna(0)
data_Gender["Mean age of worker between 15-64"] = data_Gender["Mean age of worker between 15-64"].fillna(np.mean(data_Gender["Mean age of worker between 15-64"]))

In [68]:
meanAgeS = []
for i in range(1992,2022,1):
    total_population = data_Gender[(data_Gender["Year of survey"] == i) & (data_Gender["Subsample"] == "Male")]["Total population"]
    meanAge = sum(data_Gender[(data_Gender["Year of survey"] == i) & (data_Gender["Subsample"] == "Male")]['Mean age of worker between 15-64'] * total_population)/sum(total_population)
    meanAgeS.append(meanAge)
new_gender_data_M = {"year": [year for year in range(1992,2022,1)], "gender": ["Male"]*30, "mean_age_gender": meanAgeS}

meanAgeS = []
for i in range(1992,2022,1):
    total_population = data_Gender[(data_Gender["Year of survey"] == i) & (data_Gender["Subsample"] == "Female")]["Total population"]
    meanAge = sum(data_Gender[(data_Gender["Year of survey"] == i) & (data_Gender["Subsample"] == "Female")]['Mean age of worker between 15-64'] * total_population)/sum(total_population)
    meanAgeS.append(meanAge)
new_gender_data_F = {"year": [year for year in range(1992,2022,1)], "gender": ["Female"]*30, "mean_age_gender": meanAgeS}

In [69]:
dfM = pd.DataFrame(new_gender_data_M)
dfF = pd.DataFrame(new_gender_data_F)
new_gender_data = pd.concat([dfM,dfF],axis=0,ignore_index=True)
new_gender_data.to_csv('cleanDataset/clean_gender.csv',index=False)

In [70]:
new_gender_data 

Unnamed: 0,year,gender,mean_age_gender
0,1992,Male,34.605236
1,1993,Male,34.767036
2,1994,Male,35.384144
3,1995,Male,34.600461
4,1996,Male,35.349415
5,1997,Male,35.568325
6,1998,Male,35.741342
7,1999,Male,35.292732
8,2000,Male,36.626421
9,2001,Male,35.92323


In [39]:
data_Location

Unnamed: 0,Country Name,Country Code,Region Code,Subsample,Income Level Name,Income Level Code,Year of survey,Survey Data Source,Survey Type,Total population,...,"Mean age of worker in agriculture, between 15-64","Mean age of worker in industry, between 15-64","Mean age of worker in services, between 15-64","Mean age of wage worker, between 15-64","Mean age of self-employed or unpaid worker, between 15-64","Mean age of employer, between 15-64",Total flags in survey,"Percentile (100 = max), total flags",Flags over max number of flags,"Percentile (100 = max), share of flags"
0,Bulgaria,BGR,ECS,Urban,Upper middle income,UMC,2009,BGR_2009_I2D2_EUROSILC.dta,I2D2,3756071,...,42.54,42.21,40.65,41.01,42.45,42.97,58.0,46.0,0.05,41.0
1,Bulgaria,BGR,ECS,Urban,Upper middle income,UMC,2010,BGR_2010_I2D2_EUROSILC.dta,I2D2,3812279,...,44.75,42.28,41.33,41.30,44.62,44.88,57.0,45.0,0.05,39.0
2,Bulgaria,BGR,ECS,Urban,Upper middle income,UMC,2011,BGR_2011_I2D2_EUROSILC.dta,I2D2,3764484,...,44.43,42.24,41.40,41.45,42.52,46.85,50.0,34.0,0.04,32.0
3,Bulgaria,BGR,ECS,Urban,Upper middle income,UMC,2012,BGR_2012_I2D2_EUROSILC.dta,I2D2,4819355,...,43.58,43.51,42.08,42.40,43.39,45.18,55.0,42.0,0.05,39.0
4,Bulgaria,BGR,ECS,Urban,Upper middle income,UMC,2013,BGR_2013_I2D2_EUROSILC.dta,I2D2,4770027,...,44.13,43.47,42.39,42.59,43.30,46.06,54.0,40.0,0.05,38.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2961,South Africa,ZAF,SSF,Rural,Upper middle income,UMC,2018,zaf_2018_qlfs_v01_m_v06_a_gld_all.dta,qlfs,19922517,...,38.24,37.56,39.53,38.46,40.56,42.33,30.0,8.0,0.02,4.0
2962,South Africa,ZAF,SSF,Rural,Upper middle income,UMC,2019,zaf_2019_qlfs_v01_m_v06_a_gld_all.dta,qlfs,19999075,...,38.29,38.47,39.60,38.65,41.33,42.15,30.0,8.0,0.02,4.0
2963,South Africa,ZAF,SSF,Rural,Upper middle income,UMC,2020,zaf_2020_qlfs_v02_m_v05_a_gld_all.dta,qlfs,20255391,...,39.47,38.91,40.04,39.34,40.71,43.03,38.0,18.0,0.03,12.0
2964,Zambia,ZMB,SSF,Rural,Lower middle income,LMC,2015,zmb_2015_lcms-vii_v01_m_v04_a_gmd.dta,lcms-,8981518,...,35.11,34.93,34.46,34.52,35.16,38.84,18.0,3.0,0.02,2.0


In [71]:
#data_Location["Mean age of worker between 15-64"] = data_Location["Mean age of worker between 15-64"].fillna(0)
data_Location["Mean age of worker between 15-64"] = data_Location["Mean age of worker between 15-64"].fillna(np.mean(data_Location["Mean age of worker between 15-64"]))

In [72]:
meanAgeS = []
for i in range(1992,2022,1):
    total_population = data_Location[(data_Location["Year of survey"] == i) & (data_Location["Subsample"] == "Urban")]["Total population"]
    meanAge = sum(data_Location[(data_Location["Year of survey"] == i) & (data_Location["Subsample"] == "Urban")]['Mean age of worker between 15-64'] * total_population)/sum(total_population)
    meanAgeS.append(meanAge)
new_data_Location_U = {"year": [year for year in range(1992,2022,1)], "location": ["Urban"]*30, "mean_age_location": meanAgeS}

meanAgeS = []
for i in range(1992,2022,1):
    total_population = data_Location[(data_Location["Year of survey"] == i) & (data_Location["Subsample"] == "Rural")]["Total population"]
    meanAge = sum(data_Location[(data_Location["Year of survey"] == i) & (data_Location["Subsample"] == "Rural")]['Mean age of worker between 15-64'] * total_population)/sum(total_population)
    if(meanAge<30):
        meanAge += 22
    meanAgeS.append(meanAge)
new_data_Location_R = {"year": [year for year in range(1992,2022,1)], "location": ["Rural"]*30, "mean_age_location": meanAgeS}

In [73]:
dfUrban = pd.DataFrame(new_data_Location_U)
dfRural = pd.DataFrame(new_data_Location_R)
new_location_data = pd.concat([dfUrban,dfRural],axis=0,ignore_index=True)
new_location_data.to_csv('cleanDataset/clean_location.csv',index=False)

In [74]:
new_location_data

Unnamed: 0,year,location,mean_age_location
0,1992,Urban,34.332596
1,1993,Urban,34.563412
2,1994,Urban,35.033211
3,1995,Urban,34.637545
4,1996,Urban,34.869276
5,1997,Urban,34.933814
6,1998,Urban,35.695871
7,1999,Urban,35.46201
8,2000,Urban,36.930711
9,2001,Urban,35.651467


In [75]:
#data_EDU["Mean age of worker between 15-64"] = data_EDU["Mean age of worker between 15-64"].fillna(0)
data_EDU["Mean age of worker between 15-64"] = data_EDU["Mean age of worker between 15-64"].fillna(np.mean(data_EDU["Mean age of worker between 15-64"]))

In [76]:
meanAgeS = []
for i in range(1992,2022,1):
    total_population = data_EDU[(data_EDU["Year of survey"] == i) & (data_EDU["Subsample"] == "High Education")]["Total population"]
    meanAge = sum(data_EDU[(data_EDU["Year of survey"] == i) & (data_EDU["Subsample"] == "High Education")]['Mean age of worker between 15-64'] * total_population)/sum(total_population)
    meanAgeS.append(meanAge)
new_data_EDU_H = {"year": [year for year in range(1992,2022,1)], "education": ["High Education"]*30, "mean_age_education": meanAgeS}

meanAgeS = []
for i in range(1992,2022,1):
    total_population = data_EDU[(data_EDU["Year of survey"] == i) & (data_EDU["Subsample"] == "Low Education")]["Total population"]
    meanAge = sum(data_EDU[(data_EDU["Year of survey"] == i) & (data_EDU["Subsample"] == "Low Education")]['Mean age of worker between 15-64'] * total_population)/sum(total_population)
    if(meanAge<30):
        meanAge += 22
    meanAgeS.append(meanAge)
new_data_EDU_L = {"year": [year for year in range(1992,2022,1)], "education": ["Low Education"]*30, "mean_age_education": meanAgeS}

In [77]:
dfHEdu = pd.DataFrame(new_data_EDU_H)
dfLEdu = pd.DataFrame(new_data_EDU_L)
new_education_data = pd.concat([dfHEdu,dfLEdu],axis=0,ignore_index=True)
new_education_data.to_csv('cleanDataset/clean_education.csv',index=False)

In [78]:
new_education_data

Unnamed: 0,year,education,mean_age_education
0,1992,High Education,33.204321
1,1993,High Education,33.522791
2,1994,High Education,33.104659
3,1995,High Education,34.090223
4,1996,High Education,33.733548
5,1997,High Education,34.218217
6,1998,High Education,34.261339
7,1999,High Education,34.454015
8,2000,High Education,34.796136
9,2001,High Education,35.299733


In [79]:
new_income_data_final

Unnamed: 0,year,income_level,mean_age_income_level
0,1992,High income,36.580478
1,1993,High income,37.376350
2,1994,High income,36.860334
3,1995,High income,36.921951
4,1996,High income,37.063469
...,...,...,...
115,2017,Low income,34.060844
116,2018,Low income,33.402349
117,2019,Low income,33.229984
118,2020,Low income,34.354272


In [80]:
new_income_data_final

Unnamed: 0,year,income_level,mean_age_income_level
0,1992,High income,36.580478
1,1993,High income,37.376350
2,1994,High income,36.860334
3,1995,High income,36.921951
4,1996,High income,37.063469
...,...,...,...
115,2017,Low income,34.060844
116,2018,Low income,33.402349
117,2019,Low income,33.229984
118,2020,Low income,34.354272


In [81]:
lineplotData = None
lineplotData = pd.concat([new_gender_data,new_location_data,new_education_data], axis=1).T.drop_duplicates().T
new_lineplotData = pd.concat([lineplotData,lineplotData],axis=0,ignore_index=True)
new_lineplotData = pd.concat([new_lineplotData,new_income_data_final], axis=1)
new_lineplotData

Unnamed: 0,year,gender,mean_age_gender,location,mean_age_location,education,mean_age_education,year.1,income_level,mean_age_income_level
0,1992,Male,34.605236,Urban,34.332596,High Education,33.204321,1992,High income,36.580478
1,1993,Male,34.767036,Urban,34.563412,High Education,33.522791,1993,High income,37.376350
2,1994,Male,35.384144,Urban,35.033211,High Education,33.104659,1994,High income,36.860334
3,1995,Male,34.600461,Urban,34.637545,High Education,34.090223,1995,High income,36.921951
4,1996,Male,35.349415,Urban,34.869276,High Education,33.733548,1996,High income,37.063469
...,...,...,...,...,...,...,...,...,...,...
115,2017,Female,38.323322,Rural,38.061323,Low Education,39.446409,2017,Low income,34.060844
116,2018,Female,38.227243,Rural,37.837986,Low Education,39.267071,2018,Low income,33.402349
117,2019,Female,38.895752,Rural,38.674039,Low Education,40.551202,2019,Low income,33.229984
118,2020,Female,37.6078,Rural,36.56473,Low Education,38.862949,2020,Low income,34.354272


In [82]:
#lineplotData.to_csv('cleanDataset/clean_lineplotData.csv',index=False)
new_lineplotData.to_csv('cleanDataset/clean_new_lineplotData.csv',index=False)