In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/satellite-data/validation_data_with_satellite.csv
/kaggle/input/satellite-data/training_data_with_satellite.csv
/kaggle/input/buliding-density/validation_data_with_density.csv
/kaggle/input/buliding-density/training_data_with_density.csv


In [2]:
# Feature Engineering
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Machine Learning
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, make_scorer
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_val_score
from xgboost import XGBRegressor

import optuna

# Combine Datasets

In [3]:
train_density = pd.read_csv("/kaggle/input/buliding-density/training_data_with_density.csv")
train_density.head()

Unnamed: 0,Longitude,Latitude,datetime,UHI Index,density
0,-73.909167,40.813107,24-07-2021 15:53,1.030289,8
1,-73.909187,40.813045,24-07-2021 15:53,1.030289,8
2,-73.909215,40.812978,24-07-2021 15:53,1.023798,8
3,-73.909242,40.812908,24-07-2021 15:53,1.023798,8
4,-73.909257,40.812845,24-07-2021 15:53,1.021634,8


In [4]:
train_satellite = pd.read_csv("/kaggle/input/satellite-data/training_data_with_satellite.csv")
train_satellite.head()

Unnamed: 0,Longitude,Latitude,datetime,UHI Index,B01,B02,B03,B04,B05,B06,B07,B08,B8A,B11,B12,NDVI,NDBI,NDWI,LST
0,-73.909167,40.813107,24-07-2021 15:53,1.030289,825.0,990.0,1144.0,1172.0,1435.0,1688.0,1688.0,1830.0,1688.0,1788.0,1540.0,0.219187,-0.011609,-0.230666,38.393941
1,-73.909187,40.813045,24-07-2021 15:53,1.030289,825.0,990.0,1144.0,1172.0,1435.0,1688.0,1688.0,1830.0,1688.0,1788.0,1540.0,0.219187,-0.011609,-0.230666,38.393941
2,-73.909215,40.812978,24-07-2021 15:53,1.023798,825.0,622.0,785.0,744.0,1114.0,2006.0,2138.0,2318.0,2353.0,1566.0,1170.0,0.514043,-0.193615,-0.494038,37.785534
3,-73.909242,40.812908,24-07-2021 15:53,1.023798,825.0,619.0,750.0,738.0,1114.0,2006.0,2138.0,2302.0,2353.0,1566.0,1170.0,0.514474,-0.190279,-0.508519,37.785534
4,-73.909257,40.812845,24-07-2021 15:53,1.021634,825.0,556.0,758.0,660.0,1056.0,1891.0,2141.0,2280.0,2259.0,1658.0,1240.0,0.55102,-0.157948,-0.500987,37.358281


In [5]:
train_satellite = train_satellite.drop(["Longitude","Latitude","datetime","UHI Index"], axis=1)
train_concat = pd.concat([train_density, train_satellite], axis = 1)
train_concat.head()

Unnamed: 0,Longitude,Latitude,datetime,UHI Index,density,B01,B02,B03,B04,B05,B06,B07,B08,B8A,B11,B12,NDVI,NDBI,NDWI,LST
0,-73.909167,40.813107,24-07-2021 15:53,1.030289,8,825.0,990.0,1144.0,1172.0,1435.0,1688.0,1688.0,1830.0,1688.0,1788.0,1540.0,0.219187,-0.011609,-0.230666,38.393941
1,-73.909187,40.813045,24-07-2021 15:53,1.030289,8,825.0,990.0,1144.0,1172.0,1435.0,1688.0,1688.0,1830.0,1688.0,1788.0,1540.0,0.219187,-0.011609,-0.230666,38.393941
2,-73.909215,40.812978,24-07-2021 15:53,1.023798,8,825.0,622.0,785.0,744.0,1114.0,2006.0,2138.0,2318.0,2353.0,1566.0,1170.0,0.514043,-0.193615,-0.494038,37.785534
3,-73.909242,40.812908,24-07-2021 15:53,1.023798,8,825.0,619.0,750.0,738.0,1114.0,2006.0,2138.0,2302.0,2353.0,1566.0,1170.0,0.514474,-0.190279,-0.508519,37.785534
4,-73.909257,40.812845,24-07-2021 15:53,1.021634,8,825.0,556.0,758.0,660.0,1056.0,1891.0,2141.0,2280.0,2259.0,1658.0,1240.0,0.55102,-0.157948,-0.500987,37.358281


In [6]:
validation_density = pd.read_csv("/kaggle/input/buliding-density/validation_data_with_density.csv")
validation_density.head()

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,Longitude,Latitude,UHI Index,density
0,-73.971665,40.788763,,12
1,-73.971928,40.788875,,12
2,-73.96708,40.78908,,4
3,-73.97255,40.789082,,12
4,-73.969697,40.787953,,9


In [7]:
validation_satellite = pd.read_csv("/kaggle/input/satellite-data/validation_data_with_satellite.csv")
validation_satellite.head()

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,Longitude,Latitude,UHI Index,B01,B02,B03,B04,B05,B06,B07,B08,B8A,B11,B12,NDVI,NDBI,NDWI,LST
0,-73.971665,40.788763,,794.0,511.0,568.0,527.0,1112.0,2323.0,2649.0,2360.0,2784.0,1741.0,1218.0,0.634915,-0.150939,-0.612022,36.20299
1,-73.971928,40.788875,,1221.0,494.0,661.0,497.0,1315.0,2652.0,2964.0,3662.0,3102.0,2124.0,1541.0,0.761,-0.265814,-0.694194,36.20299
2,-73.96708,40.78908,,1049.0,841.0,968.0,1056.0,1028.0,1418.0,1610.0,1402.0,1636.0,1634.0,1198.0,0.140765,0.076416,-0.183122,36.069687
3,-73.97255,40.789082,,1189.0,903.0,984.0,1108.0,1971.0,1978.0,1970.0,1478.0,2049.0,2303.0,2219.0,0.143078,0.218196,-0.20065,36.886594
4,-73.969697,40.787953,,1018.0,716.0,1046.0,917.0,1286.0,2382.0,2778.0,3038.0,2912.0,2102.0,1705.0,0.536283,-0.182101,-0.487757,34.500816


In [8]:
validation_satellite = validation_satellite.drop(["Longitude","Latitude","UHI Index"], axis=1)
validation_concat = pd.concat([validation_density, validation_satellite], axis=1)
validation_concat.head()

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,Longitude,Latitude,UHI Index,density,B01,B02,B03,B04,B05,B06,B07,B08,B8A,B11,B12,NDVI,NDBI,NDWI,LST
0,-73.971665,40.788763,,12,794.0,511.0,568.0,527.0,1112.0,2323.0,2649.0,2360.0,2784.0,1741.0,1218.0,0.634915,-0.150939,-0.612022,36.20299
1,-73.971928,40.788875,,12,1221.0,494.0,661.0,497.0,1315.0,2652.0,2964.0,3662.0,3102.0,2124.0,1541.0,0.761,-0.265814,-0.694194,36.20299
2,-73.96708,40.78908,,4,1049.0,841.0,968.0,1056.0,1028.0,1418.0,1610.0,1402.0,1636.0,1634.0,1198.0,0.140765,0.076416,-0.183122,36.069687
3,-73.97255,40.789082,,12,1189.0,903.0,984.0,1108.0,1971.0,1978.0,1970.0,1478.0,2049.0,2303.0,2219.0,0.143078,0.218196,-0.20065,36.886594
4,-73.969697,40.787953,,9,1018.0,716.0,1046.0,917.0,1286.0,2382.0,2778.0,3038.0,2912.0,2102.0,1705.0,0.536283,-0.182101,-0.487757,34.500816


# Add features

## 1. Enhanced Vegetation Index (EVI)
The EVI is used to measure vegetation greenness and is an improvement over NDVI, as it is less sensitive to atmospheric conditions and soil background.

$$
\text{EVI} = 2.5 \cdot \frac{(B8 - B4)}{(B8 + 6 \cdot B4 - 7.5 \cdot B2 + 1)}
$$

Where:
- \(B8\) = Near-Infrared (NIR) band (Band 8)
- \(B4\) = Red band (Band 4)
- \(B2\) = Blue band (Band 2)
- The constants are: \( G = 2.5, C_1 = 6, C_2 = 7.5, L = 1 \)

## 2. Soil Adjusted Vegetation Index (SAVI)
The SAVI is an index used to measure vegetation while adjusting for the influence of soil brightness in areas with sparse vegetation.

$$
\text{SAVI} = \frac{(B8 - B4)}{(B8 + B4 + 0.5)} \cdot (1 + 0.5)
$$

Where:
- \(B8\) = Near-Infrared (NIR) band (Band 8)
- \(B4\) = Red band (Band 4)
- The constant \(L = 0.5\) is used for intermediate vegetation cover.

## 3. Modified Normalized Difference Water Index (MNDWI)
The MNDWI is used for detecting water bodies, making it particularly useful for water resource monitoring and flood detection.

$$
\text{MNDWI} = \frac{(B3 - B11)}{(B3 + B11)}
$$

Where:
- \(B3\) = Green band (Band 3)
- \(B11\) = Shortwave Infrared (SWIR1) band (Band 11)

In [9]:
# Calculate EVI
train_concat['EVI'] = 2.5 * (train_concat['B08'] - train_concat['B04']) / (train_concat['B08'] + 6 * train_concat['B04'] - 7.5 * train_concat['B02'] + 1)
validation_concat['EVI'] = 2.5 * (validation_concat['B08'] - validation_concat['B04']) / (validation_concat['B08'] + 6 * validation_concat['B04'] - 7.5 * validation_concat['B02'] + 1)

# Calculate SAVI
train_concat['SAVI'] = (train_concat['B08'] - train_concat['B04']) / (train_concat['B08'] + train_concat['B04'] + 0.5) * (1 + 0.5)
validation_concat['SAVI'] = (validation_concat['B08'] - validation_concat['B04']) / (validation_concat['B08'] + validation_concat['B04'] + 0.5) * (1 + 0.5)

# Calculate MNDWI
train_concat['MNDWI'] = (train_concat['B03'] - train_concat['B11']) / (train_concat['B03'] + train_concat['B11'])
validation_concat['MNDWI'] = (validation_concat['B03'] - validation_concat['B11']) / (validation_concat['B03'] + validation_concat['B11'])


In [10]:
train_concat.head()

Unnamed: 0,Longitude,Latitude,datetime,UHI Index,density,B01,B02,B03,B04,B05,...,B8A,B11,B12,NDVI,NDBI,NDWI,LST,EVI,SAVI,MNDWI
0,-73.909167,40.813107,24-07-2021 15:53,1.030289,8,825.0,990.0,1144.0,1172.0,1435.0,...,1688.0,1788.0,1540.0,0.219187,-0.011609,-0.230666,38.393941,1.14395,0.328726,-0.219645
1,-73.909187,40.813045,24-07-2021 15:53,1.030289,8,825.0,990.0,1144.0,1172.0,1435.0,...,1688.0,1788.0,1540.0,0.219187,-0.011609,-0.230666,38.393941,1.14395,0.328726,-0.219645
2,-73.909215,40.812978,24-07-2021 15:53,1.023798,8,825.0,622.0,785.0,744.0,1114.0,...,2353.0,1566.0,1170.0,0.514043,-0.193615,-0.494038,37.785534,1.857885,0.770939,-0.332199
3,-73.909242,40.812908,24-07-2021 15:53,1.023798,8,825.0,619.0,750.0,738.0,1114.0,...,2353.0,1566.0,1170.0,0.514474,-0.190279,-0.508519,37.785534,1.872157,0.771584,-0.352332
4,-73.909257,40.812845,24-07-2021 15:53,1.021634,8,825.0,556.0,758.0,660.0,1056.0,...,2259.0,1658.0,1240.0,0.55102,-0.157948,-0.500987,37.358281,1.955577,0.82639,-0.372517


In [11]:
validation_concat.head()

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,Longitude,Latitude,UHI Index,density,B01,B02,B03,B04,B05,B06,...,B8A,B11,B12,NDVI,NDBI,NDWI,LST,EVI,SAVI,MNDWI
0,-73.971665,40.788763,,12,794.0,511.0,568.0,527.0,1112.0,2323.0,...,2784.0,1741.0,1218.0,0.634915,-0.150939,-0.612022,36.20299,2.710736,0.952208,-0.508012
1,-73.971928,40.788875,,12,1221.0,494.0,661.0,497.0,1315.0,2652.0,...,3102.0,2124.0,1541.0,0.761,-0.265814,-0.694194,36.20299,2.691327,1.141363,-0.525314
2,-73.96708,40.78908,,4,1049.0,841.0,968.0,1056.0,1028.0,1418.0,...,1636.0,1634.0,1198.0,0.140765,0.076416,-0.183122,36.069687,0.604261,0.211104,-0.255957
3,-73.97255,40.789082,,12,1189.0,903.0,984.0,1108.0,1971.0,1978.0,...,2049.0,2303.0,2219.0,0.143078,0.218196,-0.20065,36.886594,0.682909,0.214576,-0.401278
4,-73.969697,40.787953,,9,1018.0,716.0,1046.0,917.0,1286.0,2382.0,...,2912.0,2102.0,1705.0,0.536283,-0.182101,-0.487757,34.500816,1.672185,0.804323,-0.335451


# Select features 

In [12]:
features = ['B01','NDVI','NDBI','LST','density']
train_df = train_concat[features + ["UHI Index"]]
train_df

Unnamed: 0,B01,NDVI,NDBI,LST,density,UHI Index
0,825.0,0.219187,-0.011609,38.393941,8,1.030289
1,825.0,0.219187,-0.011609,38.393941,8,1.030289
2,825.0,0.514043,-0.193615,37.785534,8,1.023798
3,825.0,0.514474,-0.190279,37.785534,8,1.023798
4,825.0,0.551020,-0.157948,37.358281,8,1.021634
...,...,...,...,...,...,...
11224,432.0,0.727473,-0.241216,30.293234,0,0.972470
11225,432.0,0.766208,-0.303371,30.293234,0,0.972470
11226,432.0,0.766208,-0.303371,30.440209,0,0.981124
11227,432.0,0.766208,-0.303371,30.440209,0,0.981245


# Remove duplicates from training data

In [13]:
# Remove duplicate rows from the DataFrame based on specified columns and keep the first occurrence
for col in features:
    # Check if the value is a numpy array and has more than one dimension
    train_df[col] = train_df[col].apply(lambda x: tuple(x) if isinstance(x, np.ndarray) and x.ndim > 0 else x)

# Now remove duplicates
uhi_data = train_df.drop_duplicates(subset=features, keep='first')
uhi_data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df[col] = train_df[col].apply(lambda x: tuple(x) if isinstance(x, np.ndarray) and x.ndim > 0 else x)


Unnamed: 0,B01,NDVI,NDBI,LST,density,UHI Index
0,825.0,0.219187,-0.011609,38.393941,8,1.030289
2,825.0,0.514043,-0.193615,37.785534,8,1.023798
3,825.0,0.514474,-0.190279,37.785534,8,1.023798
4,825.0,0.55102,-0.157948,37.358281,8,1.021634
6,825.0,0.612442,-0.219765,37.358281,8,1.015143


In [14]:
uhi_data.shape

(9054, 6)

In [15]:
# Resetting the index of the dataset
uhi_data=uhi_data.reset_index(drop=True)

In [16]:
uhi_data.isna().sum()

B01          0
NDVI         0
NDBI         0
LST          0
density      0
UHI Index    0
dtype: int64

# Model Building

In [17]:
# Split the data into features (X) and target (y), and then into training and testing sets
X = uhi_data.drop(columns=['UHI Index']).values
y = uhi_data ['UHI Index'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)

**Feature Scaling**

In [18]:
# Scale the training and test data using standardscaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

**Model Training**

XGBoost

In [19]:
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000, step=100),
        'max_depth': trial.suggest_int('max_depth', 2, 100, step=2),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0, 0.8),
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-5, 10),  # L1 regularization
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-5, 10)  # L2 regularization
    }
    
    model = XGBRegressor(objective='reg:squarederror', random_state=42, **params)
    score = cross_val_score(model, X_train, y_train, cv=5, scoring='r2', n_jobs=-1)
    return score.mean()

# Run the optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10000, n_jobs=-1)

# Best hyperparameters
best_params = study.best_params
print("Best Hyperparameters found:", best_params)

# Train the model with best parameters
best_xgb = XGBRegressor(**best_params)
best_xgb.fit(X_train, y_train)

[I 2025-02-09 23:52:25,053] A new study created in memory with name: no-name-bdd45b6e-a20d-4422-bbad-e58530ef824c
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
  'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-5, 10),  # L1 regularization
  'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-5, 10)  # L2 regularization
[I 2025-02-09 23:52:27,678] Trial 0 finished with value: -0.0001716704517693124 and parameters: {'n_estimators': 500, 'max_depth': 24, 'learning_rate': 0.19556013455228163, 'subsample': 0.7182729896802494, 'colsample_bytree': 0.6517074311492479, 'min_child_weight': 1, 'gamma': 0.24696940846860535, 'reg_alpha': 0.01795101922036198, 'reg_lambda': 0.010031641411235913}. Best is trial 0 with value: -0.0001716704517693124.
[I 2025-02-09 23:52:28,081] Trial 3 finished with value: -0.00016423837572827616 and parameters: {'n_estimators': 800, 'max_depth': 12, 'learning_rate': 0.06031045522321825, 'subsample': 0.800922701689023, 'colsample_byt

Best Hyperparameters found: {'n_estimators': 900, 'max_depth': 14, 'learning_rate': 0.01740763789703819, 'subsample': 0.9112880822706334, 'colsample_bytree': 0.8000232690829194, 'min_child_weight': 1, 'gamma': 6.51054424768414e-06, 'reg_alpha': 2.6738612424515154e-05, 'reg_lambda': 0.0017770824060227685}


**Model Evaluation**

In [20]:
# Evaluate the ensemble on the test set
final_r2 = r2_score(y_test, best_xgb.predict(X_test))
print(f"Final R² score: {final_r2:.4f}")

Final R² score: 0.7297


# Submission

In [21]:
validation_concat.head()

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,Longitude,Latitude,UHI Index,density,B01,B02,B03,B04,B05,B06,...,B8A,B11,B12,NDVI,NDBI,NDWI,LST,EVI,SAVI,MNDWI
0,-73.971665,40.788763,,12,794.0,511.0,568.0,527.0,1112.0,2323.0,...,2784.0,1741.0,1218.0,0.634915,-0.150939,-0.612022,36.20299,2.710736,0.952208,-0.508012
1,-73.971928,40.788875,,12,1221.0,494.0,661.0,497.0,1315.0,2652.0,...,3102.0,2124.0,1541.0,0.761,-0.265814,-0.694194,36.20299,2.691327,1.141363,-0.525314
2,-73.96708,40.78908,,4,1049.0,841.0,968.0,1056.0,1028.0,1418.0,...,1636.0,1634.0,1198.0,0.140765,0.076416,-0.183122,36.069687,0.604261,0.211104,-0.255957
3,-73.97255,40.789082,,12,1189.0,903.0,984.0,1108.0,1971.0,1978.0,...,2049.0,2303.0,2219.0,0.143078,0.218196,-0.20065,36.886594,0.682909,0.214576,-0.401278
4,-73.969697,40.787953,,9,1018.0,716.0,1046.0,917.0,1286.0,2382.0,...,2912.0,2102.0,1705.0,0.536283,-0.182101,-0.487757,34.500816,1.672185,0.804323,-0.335451


In [22]:
submission_val_data = validation_concat[features]
submission_val_data.head()

Unnamed: 0,B01,NDVI,NDBI,LST,density
0,794.0,0.634915,-0.150939,36.20299,12
1,1221.0,0.761,-0.265814,36.20299,12
2,1049.0,0.140765,0.076416,36.069687,4
3,1189.0,0.143078,0.218196,36.886594,12
4,1018.0,0.536283,-0.182101,34.500816,9


In [23]:
# Feature Scaling 
submission_val_data = submission_val_data.values
transformed_submission_data = sc.transform(submission_val_data)

In [24]:
#Making predictions
final_predictions = best_xgb.predict(transformed_submission_data)
final_prediction_series = pd.Series(final_predictions)

In [25]:
#Combining the results into dataframe
submission_df = pd.DataFrame({'Longitude':validation_concat['Longitude'].values, 'Latitude':validation_concat['Latitude'].values, 'UHI Index':final_prediction_series.values})

In [26]:
#Displaying the sample submission dataframe
submission_df.head()

Unnamed: 0,Longitude,Latitude,UHI Index
0,-73.971665,40.788763,0.963997
1,-73.971928,40.788875,0.963665
2,-73.96708,40.78908,0.967677
3,-73.97255,40.789082,0.973001
4,-73.969697,40.787953,0.962217


In [27]:
#Dumping the predictions into a csv file.
submission_df.to_csv("submission.csv",index = False)