In [14]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the dataset from CSV
data = pd.read_csv("colorado_weather_data.csv")

print("Number of rows and columns in the data:", data.shape)
print("\nColumn datatype are:\n",data.dtypes)

Number of rows and columns in the data: (600, 13)

Column datatype are:
 City                      object
Date & Time               object
Temperature              float64
Temp Min                 float64
Temp Max                 float64
Humidity                   int64
Pressure                   int64
Wind Speed               float64
Visibility               float64
Cloud Coverage             int64
Weather Description       object
Rain Volume (last 3h)     object
Snow Volume (last 3h)     object
dtype: object


In [15]:
# 1. Print unique values in "Rain Volume (last 3h)" and "Snow Volume (last 3h)"
print("Unique values in 'Rain Volume (last 3h)':")
print(data["Rain Volume (last 3h)"].unique())

print("\nUnique values in 'Snow Volume (last 3h)':")
print(data["Snow Volume (last 3h)"].unique())

Unique values in 'Rain Volume (last 3h)':
['No rain' '0.12']

Unique values in 'Snow Volume (last 3h)':
['0.37' 'No snow' '0.18' '0.54' '0.22' '0.11' '1.89' '4.41' '0.6' '0.15'
 '0.13' '0.14' '1.56' '3.66' '6.78' '0.2' '0.23' '0.17' '0.16' '0.25'
 '0.21' '0.45' '0.43' '0.19' '0.33' '0.39']


In [16]:
# 2. Add cleaned columns for "Rain Volume (last 3h)" and "Snow Volume (last 3h)"
data["Rain Volume (last 3h) cleaned"] = data["Rain Volume (last 3h)"].replace("No rain", 0).astype(float)
data["Snow Volume (last 3h) cleaned"] = data["Snow Volume (last 3h)"].replace("No snow", 0).astype(float)

data.head()

Unnamed: 0,City,Date & Time,Temperature,Temp Min,Temp Max,Humidity,Pressure,Wind Speed,Visibility,Cloud Coverage,Weather Description,Rain Volume (last 3h),Snow Volume (last 3h),Rain Volume (last 3h) cleaned,Snow Volume (last 3h) cleaned
0,Boulder,1/26/2025 6:00,-12.64,-13.85,-12.64,89,1034,2.49,9315.0,100,Light snow,No rain,0.37,0.0,0.37
1,Boulder,1/26/2025 9:00,-12.86,-13.28,-12.86,88,1032,3.0,9482.0,89,Overcast clouds,No rain,No snow,0.0,0.0
2,Boulder,1/26/2025 12:00,-13.11,-13.11,-13.11,87,1030,3.23,10000.0,71,Broken clouds,No rain,No snow,0.0,0.0
3,Boulder,1/26/2025 15:00,-10.98,-10.98,-10.98,81,1029,3.02,10000.0,44,Scattered clouds,No rain,No snow,0.0,0.0
4,Boulder,1/26/2025 18:00,-5.4,-5.4,-5.4,82,1026,0.69,10000.0,37,Scattered clouds,No rain,No snow,0.0,0.0


In [17]:
# 3. Check for null values in the dataset
print("\nNull values in each column before filling:")
print(data.isnull().sum())


Null values in each column before filling:
City                             0
Date & Time                      0
Temperature                      0
Temp Min                         0
Temp Max                         0
Humidity                         0
Pressure                         0
Wind Speed                       0
Visibility                       7
Cloud Coverage                   0
Weather Description              0
Rain Volume (last 3h)            0
Snow Volume (last 3h)            0
Rain Volume (last 3h) cleaned    0
Snow Volume (last 3h) cleaned    0
dtype: int64


In [18]:
# 4. Fill null values: Fill with the mean
data["Visibility"].fillna(data["Visibility"].mean(), inplace=True)

# Print null values after filling
print("\nNull values in each column after filling:")
print(data.isnull().sum())


Null values in each column after filling:
City                             0
Date & Time                      0
Temperature                      0
Temp Min                         0
Temp Max                         0
Humidity                         0
Pressure                         0
Wind Speed                       0
Visibility                       0
Cloud Coverage                   0
Weather Description              0
Rain Volume (last 3h)            0
Snow Volume (last 3h)            0
Rain Volume (last 3h) cleaned    0
Snow Volume (last 3h) cleaned    0
dtype: int64


In [19]:
# 5. Print statistics for numeric columns
print("\nStatistics for numeric columns:")
print(data.describe())


Statistics for numeric columns:
       Temperature    Temp Min    Temp Max    Humidity     Pressure  \
count   600.000000  600.000000  600.000000  600.000000   600.000000   
mean     -2.005100   -2.035167   -2.004950   70.518333  1023.733333   
std       4.560056    4.605429    4.559883   15.876390     4.959154   
min     -13.870000  -13.870000  -13.870000   29.000000  1012.000000   
25%      -5.377500   -5.407500   -5.377500   59.000000  1021.000000   
50%      -1.260000   -1.260000   -1.260000   70.000000  1023.000000   
75%       1.420000    1.420000    1.420000   85.000000  1027.000000   
max       6.680000    6.680000    6.680000  105.000000  1036.000000   

       Wind Speed    Visibility  Cloud Coverage  \
count  600.000000    600.000000      600.000000   
mean     1.785767   9573.458685       30.006667   
std      0.981031   1737.481669       37.447659   
min      0.100000     31.000000        0.000000   
25%      1.110000  10000.000000        0.000000   
50%      1.705000  10