# Designed to Clean / Standardize any Dataset into a standard format

In [1021]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import calendar
import string
from string import punctuation
from itertools import chain

from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score, f1_score

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import NMF, LatentDirichletAllocation, TruncatedSVD
from sklearn.feature_selection import SelectPercentile, chi2, f_regression, f_classif

from sklearn import svm
from sklearn.utils import shuffle

df = pd.read_csv('PortofSpainUncleaned.csv', encoding='latin1')  #load Datafield csv
pd.options.mode.copy_on_write = True 


# Read and Understand the Dataset

In [1023]:
df.shape

(9561, 28)

In [1024]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9561 entries, 0 to 9560
Data columns (total 28 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              9561 non-null   object 
 1   datetime          9561 non-null   object 
 2   tempmax           9561 non-null   float64
 3   tempmin           9561 non-null   float64
 4   temp              9561 non-null   float64
 5   feelslikemax      9561 non-null   float64
 6   feelslikemin      9561 non-null   float64
 7   feelslike         9561 non-null   float64
 8   dew               9561 non-null   float64
 9   humidity          9561 non-null   float64
 10  precip            7765 non-null   float64
 11  precipprob        9561 non-null   int64  
 12  precipcover       9561 non-null   float64
 13  windspeed         9561 non-null   float64
 14  winddir           9531 non-null   float64
 15  sealevelpressure  9279 non-null   float64
 16  cloudcover        9561 non-null   float64


In [1025]:
df.head(10)   #shows the first 10 rows of the CSV to highlight what the data looks like


Unnamed: 0,name,datetime,tempmax,tempmin,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,...,solarradiation,solarenergy,uvindex,sunrise,sunset,moonphase,conditions,description,icon,stations
0,"Port of Spain, Trinidad, Trinidad And Tobago",01/01/2000,87.9,71.7,80.0,94.4,71.7,82.7,72.0,78.0,...,,,,2000-01-01T06:24:06,2000-01-01T17:54:48,0.83,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,78970099999
1,"Port of Spain, Trinidad, Trinidad And Tobago",02/01/2000,87.9,71.3,78.2,96.1,71.3,80.5,72.5,83.7,...,,,,2000-01-02T06:24:30,2000-01-02T17:55:20,0.86,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,78970099999
2,"Port of Spain, Trinidad, Trinidad And Tobago",03/01/2000,89.7,73.5,79.3,95.0,73.5,81.2,71.6,78.6,...,,,,2000-01-03T06:24:53,2000-01-03T17:55:53,0.9,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,78970099999
3,"Port of Spain, Trinidad, Trinidad And Tobago",04/01/2000,84.3,71.3,76.1,89.8,71.3,77.2,70.9,84.9,...,,,,2000-01-04T06:25:16,2000-01-04T17:56:26,0.93,Partially cloudy,Clearing in the afternoon.,partly-cloudy-day,78970099999
4,"Port of Spain, Trinidad, Trinidad And Tobago",05/01/2000,81.2,73.1,76.1,86.7,73.1,76.5,73.9,93.0,...,,,,2000-01-05T06:25:37,2000-01-05T17:56:59,0.96,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,78970099999
5,"Port of Spain, Trinidad, Trinidad And Tobago",06/01/2000,87.9,73.5,77.9,96.1,73.5,79.7,73.4,86.7,...,,,,2000-01-06T06:25:58,2000-01-06T17:57:31,0.0,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,78970099999
6,"Port of Spain, Trinidad, Trinidad And Tobago",07/01/2000,87.5,73.5,77.3,92.9,73.5,78.4,71.5,83.4,...,,,,2000-01-07T06:26:18,2000-01-07T17:58:04,0.03,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,78970099999
7,"Port of Spain, Trinidad, Trinidad And Tobago",08/01/2000,87.9,72.8,79.7,92.9,72.8,81.6,70.6,75.8,...,,,,2000-01-08T06:26:38,2000-01-08T17:58:36,0.07,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,78970099999
8,"Port of Spain, Trinidad, Trinidad And Tobago",09/01/2000,87.9,71.7,78.5,95.4,71.7,81.3,72.3,82.3,...,,,,2000-01-09T06:26:56,2000-01-09T17:59:09,0.1,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,78970099999
9,"Port of Spain, Trinidad, Trinidad And Tobago",10/01/2000,88.4,73.5,79.6,95.7,73.5,82.0,71.9,78.8,...,,,,2000-01-10T06:27:13,2000-01-10T17:59:41,0.14,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,78970099999


In [1026]:
df.describe()

Unnamed: 0,tempmax,tempmin,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,precip,precipprob,precipcover,windspeed,winddir,sealevelpressure,cloudcover,visibility,solarradiation,solarenergy,uvindex,moonphase
count,9561.0,9561.0,9561.0,9561.0,9561.0,9561.0,9561.0,9561.0,7765.0,9561.0,9561.0,9561.0,9531.0,9279.0,9561.0,9557.0,5942.0,5942.0,5942.0,9561.0
mean,88.817268,74.801286,80.65945,97.287595,74.818649,84.033197,73.351532,79.883035,0.185598,46.449116,5.002838,15.255434,96.213419,1012.591292,56.878988,7.043026,220.542915,19.046634,7.768428,0.48293
std,2.964514,2.320795,1.974303,5.220987,2.382868,3.529676,2.454116,6.149367,0.443075,49.876361,12.878168,5.066213,31.962865,1.533463,18.379914,1.000044,57.524383,4.965082,1.70543,0.28847
min,71.7,37.3,71.0,71.7,37.3,71.0,62.1,57.2,0.0,0.0,0.0,0.0,0.1,1006.0,17.5,2.7,0.0,0.0,0.0,0.0
25%,87.7,73.4,79.3,93.7,73.4,81.4,71.7,75.5,0.0,0.0,0.0,12.8,76.9,1011.6,41.4,6.5,182.7,15.8,7.0,0.25
50%,89.6,75.2,80.7,97.4,75.2,84.0,73.9,79.6,0.012,0.0,0.0,15.0,97.0,1012.7,52.5,6.9,231.3,20.0,8.0,0.48
75%,91.3,76.7,82.2,100.5,76.7,86.6,75.2,84.3,0.161,100.0,4.17,17.2,108.1,1013.7,72.9,7.6,266.5,23.0,9.0,0.75
max,138.1,80.9,86.2,138.1,88.1,96.8,79.1,99.3,9.134,100.0,100.0,162.4,360.0,1017.6,100.0,16.8,328.7,28.2,10.0,0.98


In [1027]:
df.isna()   # returns all Null values, where Null = true.

Unnamed: 0,name,datetime,tempmax,tempmin,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,...,solarradiation,solarenergy,uvindex,sunrise,sunset,moonphase,conditions,description,icon,stations
0,False,False,False,False,False,False,False,False,False,False,...,True,True,True,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,True,True,True,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,True,True,True,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,True,True,True,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,True,True,True,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9556,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
9557,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
9558,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
9559,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [1028]:
df.isna().sum()

name                   0
datetime               0
tempmax                0
tempmin                0
temp                   0
feelslikemax           0
feelslikemin           0
feelslike              0
dew                    0
humidity               0
precip              1796
precipprob             0
precipcover            0
windspeed              0
winddir               30
sealevelpressure     282
cloudcover             0
visibility             4
solarradiation      3619
solarenergy         3619
uvindex             3619
sunrise                0
sunset                 0
moonphase              0
conditions             0
description            0
icon                   0
stations               0
dtype: int64

In [1029]:
for col in df.columns:
    print(col,df[col].nunique())

name 1
datetime 9195
tempmax 147
tempmin 110
temp 125
feelslikemax 300
feelslikemin 123
feelslike 220
dew 150
humidity 341
precip 660
precipprob 2
precipcover 25
windspeed 150
winddir 1199
sealevelpressure 102
cloudcover 716
visibility 98
solarradiation 1995
solarenergy 240
uvindex 11
sunrise 9195
sunset 9195
moonphase 96
conditions 6
description 35
icon 5
stations 20


# Basic Data Preprocessing

Sunrise / sunset data convereted to just only contain time in 24 hour format.
Icon / Station / wind direction / sea pressure data was removed as deemed unncessary.
Tempratures converted to C from F.
Onehat Encoding for Condition.
Season Categroy added, converting it to Wet and Dry.
Precip converted from Inches to MM.
name truncated to only location (example port of spain)


In [1031]:
df.apply(pd.isnull).sum()/df.shape[0]

name                0.000000
datetime            0.000000
tempmax             0.000000
tempmin             0.000000
temp                0.000000
feelslikemax        0.000000
feelslikemin        0.000000
feelslike           0.000000
dew                 0.000000
humidity            0.000000
precip              0.187846
precipprob          0.000000
precipcover         0.000000
windspeed           0.000000
winddir             0.003138
sealevelpressure    0.029495
cloudcover          0.000000
visibility          0.000418
solarradiation      0.378517
solarenergy         0.378517
uvindex             0.378517
sunrise             0.000000
sunset              0.000000
moonphase           0.000000
conditions          0.000000
description         0.000000
icon                0.000000
stations            0.000000
dtype: float64

In [1032]:
df["precip"].value_counts() / df.shape[0]

precip
0.000    0.347662
0.039    0.026985
0.079    0.024684
0.012    0.022487
0.004    0.021232
           ...   
1.335    0.000105
1.512    0.000105
2.937    0.000105
1.461    0.000105
0.415    0.000105
Name: count, Length: 660, dtype: float64

In [1033]:
df.drop(columns=['icon'],inplace=True)
df.drop(columns=['stations'],inplace=True)
df.drop(columns=['winddir'],inplace=True)
df.drop(columns=['sealevelpressure'],inplace=True)
df.drop(columns=['description'],inplace=True)
df.drop(columns=['conditions'],inplace=True)
df.drop(columns=['solarradiation'],inplace=True)
df.drop(columns=['solarenergy'],inplace=True)
df.drop(columns=['uvindex'],inplace=True)
df.drop(columns=['moonphase'],inplace=True)
df.drop(columns=['sunrise'],inplace=True)
df.drop(columns=['sunset'],inplace=True)
df.drop(columns=['precipcover'], inplace=True)

df.head(10)

Unnamed: 0,name,datetime,tempmax,tempmin,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,precip,precipprob,windspeed,cloudcover,visibility
0,"Port of Spain, Trinidad, Trinidad And Tobago",01/01/2000,87.9,71.7,80.0,94.4,71.7,82.7,72.0,78.0,,0,15.0,43.8,7.6
1,"Port of Spain, Trinidad, Trinidad And Tobago",02/01/2000,87.9,71.3,78.2,96.1,71.3,80.5,72.5,83.7,,0,16.1,57.9,8.2
2,"Port of Spain, Trinidad, Trinidad And Tobago",03/01/2000,89.7,73.5,79.3,95.0,73.5,81.2,71.6,78.6,,0,12.8,73.2,9.5
3,"Port of Spain, Trinidad, Trinidad And Tobago",04/01/2000,84.3,71.3,76.1,89.8,71.3,77.2,70.9,84.9,,0,13.9,77.7,9.7
4,"Port of Spain, Trinidad, Trinidad And Tobago",05/01/2000,81.2,73.1,76.1,86.7,73.1,76.5,73.9,93.0,,0,11.4,76.0,8.2
5,"Port of Spain, Trinidad, Trinidad And Tobago",06/01/2000,87.9,73.5,77.9,96.1,73.5,79.7,73.4,86.7,,0,18.3,80.0,6.8
6,"Port of Spain, Trinidad, Trinidad And Tobago",07/01/2000,87.5,73.5,77.3,92.9,73.5,78.4,71.5,83.4,,0,15.0,64.2,7.4
7,"Port of Spain, Trinidad, Trinidad And Tobago",08/01/2000,87.9,72.8,79.7,92.9,72.8,81.6,70.6,75.8,,0,13.9,42.2,8.4
8,"Port of Spain, Trinidad, Trinidad And Tobago",09/01/2000,87.9,71.7,78.5,95.4,71.7,81.3,72.3,82.3,,0,16.1,51.4,7.1
9,"Port of Spain, Trinidad, Trinidad And Tobago",10/01/2000,88.4,73.5,79.6,95.7,73.5,82.0,71.9,78.8,,0,18.3,42.4,8.5


Convert rain to mm

In [1035]:
def inches_to_mm(inches):
    return inches * 25.4

df['precip'] = df['precip'].apply(lambda x: pd.Series(inches_to_mm(x)))
df.rename(columns={"windspeed": "windspeed mph"})

df.head(10)


Unnamed: 0,name,datetime,tempmax,tempmin,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,precip,precipprob,windspeed,cloudcover,visibility
0,"Port of Spain, Trinidad, Trinidad And Tobago",01/01/2000,87.9,71.7,80.0,94.4,71.7,82.7,72.0,78.0,,0,15.0,43.8,7.6
1,"Port of Spain, Trinidad, Trinidad And Tobago",02/01/2000,87.9,71.3,78.2,96.1,71.3,80.5,72.5,83.7,,0,16.1,57.9,8.2
2,"Port of Spain, Trinidad, Trinidad And Tobago",03/01/2000,89.7,73.5,79.3,95.0,73.5,81.2,71.6,78.6,,0,12.8,73.2,9.5
3,"Port of Spain, Trinidad, Trinidad And Tobago",04/01/2000,84.3,71.3,76.1,89.8,71.3,77.2,70.9,84.9,,0,13.9,77.7,9.7
4,"Port of Spain, Trinidad, Trinidad And Tobago",05/01/2000,81.2,73.1,76.1,86.7,73.1,76.5,73.9,93.0,,0,11.4,76.0,8.2
5,"Port of Spain, Trinidad, Trinidad And Tobago",06/01/2000,87.9,73.5,77.9,96.1,73.5,79.7,73.4,86.7,,0,18.3,80.0,6.8
6,"Port of Spain, Trinidad, Trinidad And Tobago",07/01/2000,87.5,73.5,77.3,92.9,73.5,78.4,71.5,83.4,,0,15.0,64.2,7.4
7,"Port of Spain, Trinidad, Trinidad And Tobago",08/01/2000,87.9,72.8,79.7,92.9,72.8,81.6,70.6,75.8,,0,13.9,42.2,8.4
8,"Port of Spain, Trinidad, Trinidad And Tobago",09/01/2000,87.9,71.7,78.5,95.4,71.7,81.3,72.3,82.3,,0,16.1,51.4,7.1
9,"Port of Spain, Trinidad, Trinidad And Tobago",10/01/2000,88.4,73.5,79.6,95.7,73.5,82.0,71.9,78.8,,0,18.3,42.4,8.5


Convert Degrees


In [1037]:
def fahrenheit_to_celsius(fahrenheit):
    celsius = (fahrenheit - 32) * 5 / 9
    return round(celsius, 1)


df[[ 'tempmax c']] = df ['tempmax'].apply(lambda x: pd.Series(fahrenheit_to_celsius(x)))
df[[ 'tempmin c']] = df ['tempmin'].apply(lambda x: pd.Series(fahrenheit_to_celsius(x)))
df[[ 'avgtemp c']] = df ['temp'].apply(lambda x: pd.Series(fahrenheit_to_celsius(x)))

df[[ 'feelslikemax c']] = df ['feelslikemax'].apply(lambda x: pd.Series(fahrenheit_to_celsius(x)))
df[[ 'feelslikemin c']] = df ['feelslikemin'].apply(lambda x: pd.Series(fahrenheit_to_celsius(x)))
df[[ 'avgfeelsliketemp c']] = df ['feelslike'].apply(lambda x: pd.Series(fahrenheit_to_celsius(x)))

df[[ 'dewpoint c']] = df ['dew'].apply(lambda x: pd.Series(fahrenheit_to_celsius(x)))

df.drop(columns=['temp'],inplace=True)
df.drop(columns=['tempmax'],inplace=True)
df.drop(columns=['tempmin'],inplace=True)

df.drop(columns=['feelslike'],inplace=True)
df.drop(columns=['feelslikemax'],inplace=True)
df.drop(columns=['feelslikemin'],inplace=True)
df.drop(columns=['dew'],inplace=True)

df.head(10)

Unnamed: 0,name,datetime,humidity,precip,precipprob,windspeed,cloudcover,visibility,tempmax c,tempmin c,avgtemp c,feelslikemax c,feelslikemin c,avgfeelsliketemp c,dewpoint c
0,"Port of Spain, Trinidad, Trinidad And Tobago",01/01/2000,78.0,,0,15.0,43.8,7.6,31.1,22.1,26.7,34.7,22.1,28.2,22.2
1,"Port of Spain, Trinidad, Trinidad And Tobago",02/01/2000,83.7,,0,16.1,57.9,8.2,31.1,21.8,25.7,35.6,21.8,26.9,22.5
2,"Port of Spain, Trinidad, Trinidad And Tobago",03/01/2000,78.6,,0,12.8,73.2,9.5,32.1,23.1,26.3,35.0,23.1,27.3,22.0
3,"Port of Spain, Trinidad, Trinidad And Tobago",04/01/2000,84.9,,0,13.9,77.7,9.7,29.1,21.8,24.5,32.1,21.8,25.1,21.6
4,"Port of Spain, Trinidad, Trinidad And Tobago",05/01/2000,93.0,,0,11.4,76.0,8.2,27.3,22.8,24.5,30.4,22.8,24.7,23.3
5,"Port of Spain, Trinidad, Trinidad And Tobago",06/01/2000,86.7,,0,18.3,80.0,6.8,31.1,23.1,25.5,35.6,23.1,26.5,23.0
6,"Port of Spain, Trinidad, Trinidad And Tobago",07/01/2000,83.4,,0,15.0,64.2,7.4,30.8,23.1,25.2,33.8,23.1,25.8,21.9
7,"Port of Spain, Trinidad, Trinidad And Tobago",08/01/2000,75.8,,0,13.9,42.2,8.4,31.1,22.7,26.5,33.8,22.7,27.6,21.4
8,"Port of Spain, Trinidad, Trinidad And Tobago",09/01/2000,82.3,,0,16.1,51.4,7.1,31.1,22.1,25.8,35.2,22.1,27.4,22.4
9,"Port of Spain, Trinidad, Trinidad And Tobago",10/01/2000,78.8,,0,18.3,42.4,8.5,31.3,23.1,26.4,35.4,23.1,27.8,22.2


In [1038]:
df = df.fillna(method="ffill")

  df = df.fillna(method="ffill")


In [1039]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9561 entries, 0 to 9560
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   name                9561 non-null   object 
 1   datetime            9561 non-null   object 
 2   humidity            9561 non-null   float64
 3   precip              7765 non-null   float64
 4   precipprob          9561 non-null   int64  
 5   windspeed           9561 non-null   float64
 6   cloudcover          9561 non-null   float64
 7   visibility          9561 non-null   float64
 8   tempmax c           9561 non-null   float64
 9   tempmin c           9561 non-null   float64
 10  avgtemp c           9561 non-null   float64
 11  feelslikemax c      9561 non-null   float64
 12  feelslikemin c      9561 non-null   float64
 13  avgfeelsliketemp c  9561 non-null   float64
 14  dewpoint c          9561 non-null   float64
dtypes: float64(12), int64(1), object(2)
memory usage: 1.1+ 

In [1040]:
for col in df.columns:
    print(col,df[col].nunique())

name 1
datetime 9195
humidity 341
precip 660
precipprob 2
windspeed 150
cloudcover 716
visibility 98
tempmax c 104
tempmin c 76
avgtemp c 72
feelslikemax c 188
feelslikemin c 87
avgfeelsliketemp c 129
dewpoint c 88


In [1041]:
df.isna().sum()

name                     0
datetime                 0
humidity                 0
precip                1796
precipprob               0
windspeed                0
cloudcover               0
visibility               0
tempmax c                0
tempmin c                0
avgtemp c                0
feelslikemax c           0
feelslikemin c           0
avgfeelsliketemp c       0
dewpoint c               0
dtype: int64

# Outlier Temprature Analysis
Max Temp Ever Recorded in Triniad : 37.8
Lowest Temp Ever Recorded in Trinidad : 16.1

https://www.metoffice.gov.tt/Climate#:~:text=The%20coolest%20minimum%20temperature%20is,and%2030th%2C%201964%20at%20Piarco.

Backfill null values with avg for that day to avoid squewing data

In [1044]:
df["datetime"] = pd.to_datetime(df["datetime"], dayfirst=True)
df["day"] = df["datetime"].dt.day
df["month"] = df["datetime"].dt.month
df["year"] = df["datetime"].dt.year

ref_df = df.loc['01/0/2005':'31/12/2025']

day_avg_precip = (ref_df[ref_df['precip'].notna()].groupby(['month', 'day'])['precip'].mean())
missing_mask = (df.year <= 2004) & (df['precip'].isna())
df.loc[missing_mask, 'precip'] = df[missing_mask].apply(lambda row: day_avg_precip.get((row['month'], row['day']), None),axis=1)

In [1045]:
df.drop(columns=['precipprob'], inplace=True)
df['precip_chance'] = (df['precip'] > 0.00).astype(int)

Replace outliers with more than 36C actual temp with average for that day

In [1047]:
day_avg_tempmax = df[df['tempmax c'] <= 37.8].groupby(['month', 'day'])['tempmax c'].mean()

mask = df['tempmax c'] > 37.8

df.loc[mask, 'tempmax c'] = df[mask].apply(
    lambda row: day_avg_tempmax.get((row['month'], row['day']), row['tempmax c']),
    axis=1
)

Replace outlier mintemp < 16.1 with average for that day

In [1049]:
day_avg_tempmin = df[df['tempmin c'] <= 16.1].groupby(['month', 'day'])['tempmin c'].mean()

mask = df['tempmin c'] < 16.1

df.loc[mask, 'tempmin c'] = df[mask].apply(
    lambda row: day_avg_tempmin.get((row['month'], row['day']), row['tempmin c']),
    axis=1
)

# Replace outlier feelsmintemp < 16.1 with average for that day

In [1051]:
day_avg_tempminfeellike = df[df['feelslikemin c'] <= 16.1].groupby(['month', 'day'])['feelslikemin c'].mean()

mask = df['feelslikemin c'] > 16.1

df.loc[mask, 'feelslikemin c'] = df[mask].apply(
    lambda row: day_avg_tempminfeellike.get((row['month'], row['day']), row['feelslikemin c']),
    axis=1
)
df.drop(columns=['month', 'day', 'year'], inplace=True)


In [1052]:
def rename_all_columns():
    full_name = df['name'].iloc[0]
    parts = full_name.split(',')  
    location = (parts[0]) 
    df.drop(columns=['name'],inplace=True)

    new_column_names = {col: f'{location} {col}' for col in df.columns if col != 'datetime'}
    df.rename(columns=new_column_names, inplace=True)


rename_all_columns()
df.head()

Unnamed: 0,datetime,Port of Spain humidity,Port of Spain precip,Port of Spain windspeed,Port of Spain cloudcover,Port of Spain visibility,Port of Spain tempmax c,Port of Spain tempmin c,Port of Spain avgtemp c,Port of Spain feelslikemax c,Port of Spain feelslikemin c,Port of Spain avgfeelsliketemp c,Port of Spain dewpoint c,Port of Spain precip_chance
0,2000-01-01,78.0,9.779,15.0,43.8,7.6,31.1,22.1,26.7,34.7,22.1,28.2,22.2,1
1,2000-01-02,83.7,2.10185,16.1,57.9,8.2,31.1,21.8,25.7,35.6,21.8,26.9,22.5,1
2,2000-01-03,78.6,4.9022,12.8,73.2,9.5,32.1,23.1,26.3,35.0,23.1,27.3,22.0,1
3,2000-01-04,84.9,10.25525,13.9,77.7,9.7,29.1,21.8,24.5,32.1,21.8,25.1,21.6,1
4,2000-01-05,93.0,0.6985,11.4,76.0,8.2,27.3,22.8,24.5,30.4,22.8,24.7,23.3,1


In [1053]:
fname = f'{location}.csv'
df.to_csv(fname, index=False)