<a href="https://colab.research.google.com/github/anindabitm/PDFM-use-case/blob/main/PDFM_embeddings_to_predict_PM2_5_in_US.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Predicting US PM2.5 levels using Google's Population Dynamics Foundation Model**

Useful Resources:
1. https://github.com/opengeos/GeoAI-Tutorials/blob/main/docs/PDFM/zillow_home_value.ipynb
2. https://github.com/google-research/population-dynamics/tree/master/notebooks

Acknowledgements:
This notebook is based on tutorials - [PDFM notebook](https://github.com/google-research/population-dynamics/tree/master/notebooks) and awesome tutorial by giswqs opengeos PDFM [zillow home price](https://github.com/opengeos/GeoAI-Tutorials/blob/main/docs/PDFM/zillow_home_value.ipynb)

In [2]:
%%capture
!pip install leafmap

In [3]:
#import libraries
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from leafmap.common import evaluate_model, plot_actual_vs_predicted, download_file

# Get US PM2.5 data
Link to data: https://usc-geohealth-hub-uscssi.hub.arcgis.com/documents/7fc448343d6643f3bb13157fd65aed4f/about

In [4]:
df0 = pd.read_excel("/content/pm25_and_disparity.xlsx",sheet_name="data_part1")
df1 = pd.read_excel("/content/pm25_and_disparity.xlsx",sheet_name="data_part2")
df2 = pd.read_excel("/content/pm25_and_disparity.xlsx",sheet_name="data_part3")
df3 = pd.read_excel("/content/pm25_and_disparity.xlsx",sheet_name="data_part4")
df4 = pd.read_excel("/content/pm25_and_disparity.xlsx",sheet_name="data_part5")

# Process PM2.5 data

In [5]:
df = pd.concat([df0,df1,df2,df3,df4],ignore_index=True)
df.head()

Unnamed: 0.1,Unnamed: 0,year,ZIP,zcta,popdensity,population,poverty,education,pct_blk,pct_hisp,pct_native,pct_asian,pct_white,black_pop,hisp_pop,native_pop,asian_pop,white_pop,medhouseholdincome,pm25,urban
0,1,2000,1,,,,,,,,,,,,,,,,,,
1,2,2001,1,,,,,,,,,,,,,,,,,,
2,3,2002,1,,,,,,,,,,,,,,,,,,
3,4,2003,1,,,,,,,,,,,,,,,,,,
4,5,2004,1,,,,,,,,,,,,,,,,,,


In [6]:
df.shape

(789260, 21)

In [7]:
df["zcta"].nunique()

32406

In [8]:
pm25_df = df.groupby(["zcta"]).mean()["pm25"]
pm25_df.head()

Unnamed: 0_level_0,pm25
zcta,Unnamed: 1_level_1
601.0,
602.0,
603.0,
606.0,
610.0,


In [9]:
pm25_df.dropna(axis=0,inplace=True)
pm25_df.head()

Unnamed: 0_level_0,pm25
zcta,Unnamed: 1_level_1
1001.0,9.39818
1002.0,8.026795
1003.0,8.94902
1005.0,6.409811
1007.0,7.375929


In [10]:
pm25_df.index = pm25_df.index.astype(int)
print(pm25_df.shape)
pm25_df.head()

(31956,)


Unnamed: 0_level_0,pm25
zcta,Unnamed: 1_level_1
1001,9.39818
1002,8.026795
1003,8.94902
1005,6.409811
1007,7.375929


In [11]:
pm25_df = pm25_df.reset_index(drop=False)  # Remove inplace=True
pm25_df.index = pm25_df["zcta"].apply(lambda x: f"zip/{x}") # Access 'zcta' column
pm25_df.head()

Unnamed: 0_level_0,zcta,pm25
zcta,Unnamed: 1_level_1,Unnamed: 2_level_1
zip/1001,1001,9.39818
zip/1002,1002,8.026795
zip/1003,1003,8.94902
zip/1005,1005,6.409811
zip/1007,1007,7.375929


# Request access to PDFM Embeddings

In [14]:
!unzip /content/pdfm_embeddings.zip

Archive:  /content/pdfm_embeddings.zip
   creating: pdfm_embeddings/
   creating: pdfm_embeddings/v0/
   creating: pdfm_embeddings/v0/us/
  inflating: pdfm_embeddings/v0/us/county.geojson  
  inflating: pdfm_embeddings/v0/us/county_embeddings.csv  
  inflating: pdfm_embeddings/v0/us/zcta.geojson  
  inflating: pdfm_embeddings/v0/us/zcta_embeddings.csv  


In [12]:
embeddings_file_path = "/content/pdfm_embeddings/v0/us/zcta_embeddings.csv"

In [15]:
if not os.path.exists(embeddings_file_path):
    raise FileNotFoundError("Please request the embeddings from Google")

In [16]:
zipcode_embeddings = pd.read_csv(embeddings_file_path).set_index("place")
zipcode_embeddings.head()

Unnamed: 0_level_0,state,county,city,population,latitude,longitude,feature0,feature1,feature2,feature3,feature4,feature5,feature6,feature7,feature8,feature9,feature10,feature11,feature12,feature13,feature14,feature15,feature16,feature17,feature18,feature19,feature20,feature21,feature22,feature23,feature24,feature25,feature26,feature27,feature28,feature29,feature30,feature31,feature32,feature33,feature34,feature35,feature36,feature37,feature38,feature39,feature40,feature41,feature42,feature43,...,feature280,feature281,feature282,feature283,feature284,feature285,feature286,feature287,feature288,feature289,feature290,feature291,feature292,feature293,feature294,feature295,feature296,feature297,feature298,feature299,feature300,feature301,feature302,feature303,feature304,feature305,feature306,feature307,feature308,feature309,feature310,feature311,feature312,feature313,feature314,feature315,feature316,feature317,feature318,feature319,feature320,feature321,feature322,feature323,feature324,feature325,feature326,feature327,feature328,feature329
place,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
zip/97910,OR,Malheur County,Jordan Valley,609,42.749076,-117.511459,-0.138227,1.120377,0.0729,0.297442,0.772673,0.286467,0.802398,0.620847,0.06081,0.125926,0.452905,0.31721,1.560488,0.173717,0.338584,-0.011876,0.369918,0.734241,-0.023161,0.927918,0.131129,0.174915,0.186962,0.777327,-0.125255,0.252997,0.126703,0.282713,0.286217,0.108222,0.138043,1.893893,0.034341,0.116197,1.578618,0.594598,0.083396,0.217705,0.085833,0.050959,0.071389,0.046794,0.900741,0.089737,...,0.795463,4.048826,4.411071,-0.072585,1.172804,2.780721,-0.007037,-0.167074,-0.169071,2.451642,4.705745,2.861044,0.964761,5.425952,-0.086446,-0.004044,1.69268,-0.129304,1.262767,0.584393,4.95798,-0.113619,3.977844,-0.056266,0.154679,6.833614,-0.168595,3.852105,-0.0086,0.168367,2.672679,6.938071,1.462526,4.700379,3.523755,-0.169971,0.279797,-0.03063,-1.4e-05,4.48936,-0.158891,-0.168708,1.231994,-0.155765,3.043214,-0.169749,0.177463,-0.001661,-0.00101,4.495589
zip/89412,NV,Washoe County,Gerlach,98,41.102934,-119.695361,-0.141379,1.422782,0.234269,0.159156,0.890241,0.215427,0.5332,1.12583,0.159891,0.305449,0.673448,0.222371,1.113196,0.147047,0.270858,0.140051,0.591381,1.321858,0.182956,1.600126,0.14948,1.239454,0.326186,0.724274,-0.057049,0.327722,0.225044,0.092865,0.636425,0.541034,0.168866,1.489893,-0.003255,0.524351,1.672642,0.421262,0.701539,0.231094,0.223512,0.13448,0.479852,0.204405,1.043766,0.396896,...,-0.080731,5.357723,3.97349,-0.100555,2.942436,4.120401,-0.027611,-0.084821,-6.8e-05,1.36474,6.08568,5.550473,-0.057929,7.677364,-0.111488,0.442242,2.686013,-0.008651,-0.003287,4.685384,6.11702,-0.083324,4.441121,-0.002397,-0.119943,4.539424,-0.006676,4.993636,-0.118508,-0.169039,1.864953,4.146715,-0.11822,3.688882,4.046134,-0.045537,1.627209,-0.012242,-0.016643,4.668972,-0.157417,-0.043606,2.788701,-0.062547,3.700745,-0.169827,-0.13799,-0.024385,-0.000295,3.399393
zip/88030,NM,Luna County,Deming,24139,32.191634,-107.729431,-0.046666,1.414424,0.146803,1.113256,1.119576,1.093199,0.960636,0.179642,0.729488,2.447439,2.274204,2.765325,0.903284,0.520162,2.604348,0.68852,0.164436,2.755828,1.312246,0.452269,0.61266,1.135295,1.440466,0.507069,-0.140809,0.481306,1.068717,2.188697,0.254398,1.019234,0.277727,0.716491,0.861136,1.232256,0.210326,0.694031,1.504018,1.430361,0.842551,0.222043,1.114556,0.856425,1.518791,1.487212,...,0.472449,4.089555,1.347347,0.128349,2.517892,0.718034,0.546053,2.494347,-0.024888,3.658147,5.096304,3.68795,-0.162521,4.844636,0.789083,3.02567,0.667166,-0.169164,-0.109891,3.095128,4.816823,-0.169883,4.487709,-0.058803,-0.167333,2.843048,-0.060544,3.279263,-0.158699,1.535189,2.787231,3.861916,1.569119,3.487299,2.334693,0.068785,-0.162307,-0.053849,0.160504,1.895565,-0.000654,0.437475,4.229295,0.229199,2.098469,1.150497,0.716122,-0.116499,-0.051163,3.866543
zip/82633,WY,Converse County,Douglas,9478,43.02227,-105.41025,-0.090293,1.26628,0.447868,0.781861,1.731813,0.602722,0.737066,0.743392,0.823658,1.4752,1.639734,0.20234,0.545946,0.486171,0.425758,0.951557,0.448131,0.889409,1.116265,0.331308,0.694245,0.774092,0.893476,2.088896,-0.012767,1.37942,0.541944,0.903094,1.245158,0.514747,0.24052,1.853385,0.691478,1.095086,0.102779,0.976397,1.508152,1.098709,0.658931,1.544933,3.26799,1.033022,0.948243,0.768377,...,4.709711,2.619931,0.377791,-0.13609,1.546929,1.914665,-0.038279,-0.158291,3.846224,1.600872,2.55624,5.028241,3.131569,1.885251,1.723152,3.287659,0.592335,-0.169679,0.799571,1.711086,6.434799,2.259457,5.137226,-0.157376,3.739257,1.849344,0.817178,4.254727,-0.031455,0.860355,3.185768,4.815537,1.889562,3.147158,5.902875,0.248916,-0.013526,-0.035991,-0.037467,2.813852,-0.033771,0.579775,2.688665,0.175669,0.990921,1.644879,0.222517,-0.047864,-4.2e-05,7.453567
zip/59538,MT,Phillips County,Malta,2936,48.112019,-107.84552,-0.092886,1.256203,-0.050897,0.321954,1.281864,0.737793,1.662178,0.451061,0.190265,-0.127765,0.506115,0.792137,0.385507,0.394926,0.477761,1.028206,0.047681,0.87974,0.79573,0.239135,0.282084,0.193326,1.262094,0.453796,-0.169351,0.601323,0.670364,0.581992,0.540012,0.218976,1.195483,0.035199,0.274211,0.681594,0.819916,1.234735,0.289213,-0.010891,0.911312,0.780166,2.906506,0.524723,1.004237,-0.098108,...,0.74141,0.435825,0.415687,-0.168535,1.068465,-0.014837,-0.058268,-0.168225,-0.084327,4.26206,0.444936,2.504024,0.534612,1.366006,0.086276,1.766271,3.652062,-0.162912,-0.102837,2.123431,6.335544,-0.158536,2.916174,-0.000554,4.01317,1.245277,-0.146109,1.630525,0.193676,-0.132476,1.447661,4.800499,-0.009952,0.217168,1.960558,-0.080472,0.211844,-0.045951,-0.012506,-0.169497,-0.169915,-0.088829,0.338914,-0.102962,-0.156583,1.493696,2.259007,-0.161916,-0.001087,0.972243


# Join PDFM embeddings and Groud Truth (PM2.5 data)

In [17]:
data = pm25_df.join(zipcode_embeddings, how="inner")
data.head()

Unnamed: 0,zcta,pm25,state,county,city,population,latitude,longitude,feature0,feature1,feature2,feature3,feature4,feature5,feature6,feature7,feature8,feature9,feature10,feature11,feature12,feature13,feature14,feature15,feature16,feature17,feature18,feature19,feature20,feature21,feature22,feature23,feature24,feature25,feature26,feature27,feature28,feature29,feature30,feature31,feature32,feature33,feature34,feature35,feature36,feature37,feature38,feature39,feature40,feature41,...,feature280,feature281,feature282,feature283,feature284,feature285,feature286,feature287,feature288,feature289,feature290,feature291,feature292,feature293,feature294,feature295,feature296,feature297,feature298,feature299,feature300,feature301,feature302,feature303,feature304,feature305,feature306,feature307,feature308,feature309,feature310,feature311,feature312,feature313,feature314,feature315,feature316,feature317,feature318,feature319,feature320,feature321,feature322,feature323,feature324,feature325,feature326,feature327,feature328,feature329
zip/10001,10001,13.636975,NY,New York County,New York,26966,40.750672,-73.997281,-0.073025,0.621611,3.204059,1.21925,0.086666,0.422733,1.362252,0.448227,0.780789,0.076176,0.744889,0.206153,1.486222,2.341626,2.934966,3.19765,2.227243,0.536704,1.641904,0.215733,0.084727,0.164679,4.511102,2.252191,-0.024315,0.212265,3.857031,2.217771,0.308766,4.034735,1.695519,0.567224,-0.077346,3.953824,1.425099,2.27584,1.042735,1.490175,1.557218,0.206169,0.049573,2.313412,...,1.501178,-0.123201,-0.16726,-0.159472,-0.001858,-0.12998,0.175728,-1.2e-07,-0.008414,-0.155363,0.315214,-0.00908,-0.0,-0.0,0.770695,-0.152233,-0.049659,8.20949,0.70082,1.000827,5.112181,-0.146953,1.65492,-0.002328,-0.004713,-0.16202,9.814237,2.872081,7.905857,9.632621,7.280638,-0.017287,0.479747,1.191547,6.030477,-0.028483,-0.002579,-0.150458,-0.145124,-0.000732,-0.006855,2.571859,-0.006076,4.313338,-0.10529,-9.5e-06,1.581655,-0.015907,-0.0,3.977647
zip/10002,10002,12.896079,NY,New York County,New York,76807,40.715762,-73.986258,-0.144769,1.102272,3.980774,1.751827,0.082982,0.474515,0.949866,0.379872,1.394939,0.239189,0.831812,0.231631,1.985357,2.112055,3.209443,3.651924,1.369114,0.449409,2.278484,0.187911,0.342743,0.17803,5.85874,2.435344,-0.011125,0.203172,3.586207,2.016004,0.292903,4.168702,2.146975,1.026136,-0.08515,4.931133,1.66473,2.57237,0.716705,2.225487,2.063185,0.276354,0.272357,3.37065,...,0.361146,-0.096761,-0.146408,-0.155304,-0.074113,-0.015054,1.665584,-1.102e-05,-0.002935,-0.169603,0.145326,-0.025443,-0.0,-5e-08,-0.16151,-0.153942,-0.16641,6.376185,1.163428,1.138928,5.270078,-0.159924,1.996964,-4.1e-05,-0.020128,-0.05064,8.771538,2.0861,8.602596,8.060193,8.714463,-0.004688,0.136199,0.513982,5.643821,-0.00235,-0.008118,-0.089103,-0.156498,-0.034799,-0.103022,2.710451,-2.1e-05,4.655519,-0.159014,-0.0,5.388407,-0.112462,-0.0,4.431747
zip/10003,10003,12.829412,NY,New York County,New York,54447,40.731829,-73.989181,-0.131461,1.132039,3.51219,1.1768,0.161716,0.45937,1.291074,0.475496,1.14647,0.199002,0.968685,0.249058,1.788139,2.295915,3.039128,3.72678,1.822461,0.546915,1.955472,0.18888,0.19817,0.138593,5.482426,2.644991,-0.030993,0.25675,3.747168,2.160955,0.311789,4.131626,2.050255,0.924002,-0.077789,4.401683,1.490332,2.379128,0.871206,2.15197,1.929199,0.361298,0.22965,3.038753,...,0.461322,-0.141985,-0.085863,-0.156253,-0.005502,-0.046846,1.59613,-4.81e-06,-0.089342,-0.09954,0.272569,-0.009312,-0.0,-0.0,0.092946,-0.169363,-0.042557,6.376621,0.606484,0.778049,5.227136,-0.169888,2.206395,-0.000619,-0.166381,-0.167345,8.366828,2.290446,7.762557,7.947579,6.410265,-0.020947,0.935508,0.415192,5.355077,-0.027503,-0.000485,-0.123136,0.030198,-0.001412,-0.004739,2.824203,-0.000298,4.061482,-0.068253,-1.35e-06,4.039156,-0.156848,-0.0,5.094444
zip/10004,10004,13.851765,NY,New York County,New York,4795,40.68863,-74.018244,-0.147625,0.546787,3.229492,1.886864,0.108621,0.60301,1.442713,0.47115,1.028882,0.41173,0.708382,0.346837,1.798658,1.940426,3.435431,2.693004,1.95638,0.300429,2.173964,0.440953,0.339811,0.275773,5.271566,2.132799,-0.018342,0.398195,3.207484,1.488841,0.526957,4.330931,1.80641,1.111506,-0.118441,4.121751,1.604998,1.996196,0.674802,1.756082,2.741566,0.211858,0.289891,2.604892,...,0.393506,-0.142817,-0.135064,-0.018993,-0.057043,-0.13163,0.758817,-0.0,-7e-06,-0.134696,0.017068,-0.007268,-0.0,-3.08e-06,-0.152431,-0.105453,-0.141028,1.679086,0.534155,1.851612,5.175022,1.438296,2.622611,-9e-05,-0.004313,-0.119687,2.166597,3.767899,8.035578,4.923587,6.152561,-0.007616,-0.072199,0.052521,3.980608,-6.9e-05,-0.000103,-0.027072,0.367801,-0.003337,-0.010422,1.127201,-7.9e-05,3.496159,-0.079587,-8e-08,6.799802,-0.078682,-0.0,4.140815
zip/10005,10005,13.73,NY,New York County,New York,8637,40.705974,-74.008768,-0.142861,0.778521,3.640563,1.774548,0.10314,0.462198,1.238112,0.362621,1.067066,0.240392,1.012144,0.450193,2.065349,2.07618,3.030134,3.465373,1.88894,0.374023,2.323722,0.307162,0.243969,0.205338,5.640188,2.588103,-0.015375,0.324594,3.737864,1.677935,0.379659,4.378235,2.042296,1.018322,-0.109311,4.715398,1.757364,2.500815,0.888996,1.959577,2.203878,0.227336,0.243604,3.12481,...,-0.11729,-0.084119,-0.169264,-0.115257,-0.010591,-0.026248,1.397036,-0.0,-0.000242,-0.167469,0.550521,-0.013257,-0.0,-3.5e-07,-0.156989,0.212693,-0.13322,3.382343,0.718064,0.732648,5.014384,0.157389,2.529759,-0.000866,-0.002382,-0.072132,3.90559,2.831104,8.165786,6.136455,6.366154,-0.007492,0.567445,-0.101746,3.834963,-0.009529,-0.000505,0.028645,1.454115,-0.001952,-0.011845,2.38504,-1.7e-05,3.45434,-0.159854,-1.32e-06,7.295258,-0.169108,-0.0,3.934241


In [18]:
data.shape

(29298, 338)

In [19]:
embedding_features = [f"feature{x}" for x in range(330)]
label = "pm25"

In [20]:
data = data.dropna(subset=[label])

# Split Train and Test Data

In [21]:

data = data[embedding_features + [label]]
X = data[embedding_features]
y = data[label]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Fit K-Nearest Neighbors Model

In [22]:
k = 5
model = KNeighborsRegressor(n_neighbors=k)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

In [23]:

evaluation_df = pd.DataFrame({"y": y_test, "y_pred": y_pred})
# Evaluate the model
metrics = evaluate_model(evaluation_df)
print(metrics)

{'r2': 0.9423382724431623, 'r': 0.9708871328512507, 'rmse': 0.6494730499555952, 'mae': 0.4204252179218217, 'mape': 0.05619889943563576}


# Evaluate K-Nearest Neighbors Model

In [28]:
xy_lim = (0, 30)
plot_actual_vs_predicted(
    evaluation_df,
    xlim=xy_lim,
    ylim=xy_lim,
    title="Actual vs Predicted PM2.5",
    x_label="Actual PM2.5",
    y_label="Predicted PM2.5",
)

# Fit Random Forest Regressor model

In [29]:
model = RandomForestRegressor(n_estimators=10,verbose=10,n_jobs=-1)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.


building tree 1 of 10building tree 2 of 10



[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   20.0s


building tree 3 of 10
building tree 4 of 10
building tree 5 of 10


[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   38.8s


building tree 6 of 10
building tree 7 of 10
building tree 8 of 10
building tree 9 of 10
building tree 10 of 10


[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  1.6min finished
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   1 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done   4 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done  10 out of  10 | elapsed:    0.0s finished


In [30]:
evaluation_df = pd.DataFrame({"y": y_test, "y_pred": y_pred})
# Evaluate the model
metrics = evaluate_model(evaluation_df)
print(metrics)

{'r2': 0.9086928793143653, 'r': 0.9539294211355118, 'rmse': 0.817277670967286, 'mae': 0.568816793219495, 'mape': 0.07572915522396176}


# Evaluate Random Forest Model

In [31]:
xy_lim = (0, 30)
plot_actual_vs_predicted(
    evaluation_df,
    xlim=xy_lim,
    ylim=xy_lim,
    title="Actual vs Predicted PM2.5",
    x_label="Actual PM2.5",
    y_label="Predicted PM2.5",
)