# Weather Dataset Analysis

In [91]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Importing Dataset and Checking the Data

In [92]:
df = pd.read_csv("Weather Data.csv")

In [93]:
# print(df.shape)
# print(df.index)
# print(df.head())
# print(df.columns)
# print(df.dtypes)
# print(df["Weather"].unique())
# print(df.nunique())
# print(df["Weather"].count())
# print(df["Weather"].value_counts())
# print(df.info())

In [94]:
print(df.head(1))

        Date/Time  Temp_C  Dew_Point_Temp_C  Rel_Hum_%  Wind_Speed_km/h  \
0  1/01/2012 0:00    -1.8              -3.9         86                4   

   Visibility_km  Press_kPa Weather  
0            8.0     101.24     Fog  


---

### 1. Find all the unique "Wind Speed" values in the data.

In [95]:
print(df["Wind_Speed_km/h"].unique())

[ 4  7  6  9 15 13 20 22 19 24 30 35 39 32 33 26 44 43 48 37 28 17 11  0
 83 70 57 46 41 52 50 63 54  2]


---

### 2. Find the number of times when the weather is exactly "Clear".

In [96]:
clear_weather_count = df["Weather"].value_counts()
print(f"The number of times the weather was exactly clear is : {clear_weather_count.loc["Clear"]}")

# using filtering
clear_weather_count = (df["Weather"]=="Clear").sum()
# clear_weather_count = df["Weather"][df["Weather"]=="Clear"].count()
print(clear_weather_count)

#using groupby
clear_weather_count = df["Weather"].groupby(df["Weather"]).get_group("Clear").count()
print(clear_weather_count)

The number of times the weather was exactly clear is : 1326
1326
1326


---

### 3. Find the number of times the wind speed was exactly 4km/h.

In [97]:
wind_speed_count = df["Wind_Speed_km/h"].value_counts()
print(f"The number of times wind speed was 4km/h is : {wind_speed_count.loc[4]}")

The number of times wind speed was 4km/h is : 474


---

### 4. Find out all Null values in the data

In [98]:
print(df.isnull().sum())
print(df.notnull().sum())
#There is no null value

Date/Time           0
Temp_C              0
Dew_Point_Temp_C    0
Rel_Hum_%           0
Wind_Speed_km/h     0
Visibility_km       0
Press_kPa           0
Weather             0
dtype: int64
Date/Time           8784
Temp_C              8784
Dew_Point_Temp_C    8784
Rel_Hum_%           8784
Wind_Speed_km/h     8784
Visibility_km       8784
Press_kPa           8784
Weather             8784
dtype: int64


---

### 5. Rename the column "Weather" to "Weather Condition"

In [101]:
df = df.rename(columns={"Weather" : "Weather Condition"})
print(df)

             Date/Time  Temp_C  Dew_Point_Temp_C  Rel_Hum_%  Wind_Speed_km/h  \
0       1/01/2012 0:00    -1.8              -3.9         86                4   
1       1/01/2012 1:00    -1.8              -3.7         87                4   
2       1/01/2012 2:00    -1.8              -3.4         89                7   
3       1/01/2012 3:00    -1.5              -3.2         88                6   
4       1/01/2012 4:00    -1.5              -3.3         88                7   
...                ...     ...               ...        ...              ...   
8779  12/31/2012 19:00     0.1              -2.7         81               30   
8780  12/31/2012 20:00     0.2              -2.4         83               24   
8781  12/31/2012 21:00    -0.5              -1.5         93               28   
8782  12/31/2012 22:00    -0.2              -1.8         89               28   
8783  12/31/2012 23:00     0.0              -2.1         86               30   

      Visibility_km  Press_kPa     Weat

---

### 6. What is the mean of visibility?

In [None]:
mean_visibility = df["Visibility_km"].mean()
print(f"The mean visibility is : {mean_visibility}")

The mean visibility is : 27.664446721311474


---

### 7. What is the standard deviation of "Pressure" in this data?

In [110]:
std_pressure = df["Press_kPa"].std()
print(f"The standard deviation of Pressure is : {std_pressure}")

The standard deviation of Pressure is : 0.8440047459486483


Standard Deviation (SD) is a measure of how spread out numbers are in a data set, standard deviation tells you how much the individual data points stray from the center.

City A (San Diego style): Temperatures are mostly 24°C, 25°C, and 26°C. The SD is very low. You can trust the average when packing your bags.

City B (Desert style): Temperatures are 45°C during the day and 5°C at night. The SD is very high. The average (25°C) is technically correct but practically useless for deciding what to wear.

A common way to tell if an SD is small is to look at the Coefficient of Variation (CV), which is just : CV = std/mean

If CV < 0.1 (less than 10%), the data is usually considered to have low Perk/Variance (a small SD).

If CV > 0.5 (more than 50%), the data is spread out (a large SD).

---

### 8. What is the  variance of Relative Humidity in the data?

In [112]:
var_relative_humidity = df["Rel_Hum_%"].var()
print(f"Variance of relative humidity is : {var_relative_humidity}")

Variance of relative humidity is : 286.24855019850196


Variance is the average of the squared differences from the Mean.

---

### 9. Find all instances when "Snow" was recorded.

In [126]:
# using value_counts
recorded_snow_count = df["Weather Condition"].value_counts()
print(f"The number of times snow was recorded is : {recorded_snow_count.loc["Snow"]}")

recorded_snow_count = (df["Weather Condition"] == "Snow").sum()
print(f"The number of times snow was recorded is : {recorded_snow_count}")

The number of times snow was recorded is : 390
The number of times snow was recorded is : 390


---

### 10. Find all instances when the wind speed is above 24 and visibilty is 25.

In [128]:
cond_check = df[(df["Visibility_km"] == 25) & (df["Wind_Speed_km/h"] > 24)]
print(cond_check)

             Date/Time  Temp_C  Dew_Point_Temp_C  Rel_Hum_%  Wind_Speed_km/h  \
23     1/01/2012 23:00     5.3               2.0         79               30   
24      1/02/2012 0:00     5.2               1.5         77               35   
25      1/02/2012 1:00     4.6               0.0         72               39   
26      1/02/2012 2:00     3.9              -0.9         71               32   
27      1/02/2012 3:00     3.7              -1.5         69               33   
...                ...     ...               ...        ...              ...   
8705  12/28/2012 17:00    -8.6             -12.0         76               26   
8753  12/30/2012 17:00   -12.1             -15.8         74               28   
8755  12/30/2012 19:00   -13.4             -16.5         77               26   
8759  12/30/2012 23:00   -12.1             -15.1         78               28   
8760   12/31/2012 0:00   -11.1             -14.4         77               26   

      Visibility_km  Press_kPa Weather 

---

### 11. What is the mean value of each column again each weather condition?

In [132]:
df.groupby(df["Weather Condition"]).mean(numeric_only=True)

Unnamed: 0_level_0,Temp_C,Dew_Point_Temp_C,Rel_Hum_%,Wind_Speed_km/h,Visibility_km,Press_kPa
Weather Condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Clear,6.825716,0.089367,64.497738,10.557315,30.153243,101.587443
Cloudy,7.970544,2.37581,69.592593,16.127315,26.625752,100.911441
Drizzle,7.353659,5.504878,88.243902,16.097561,17.931707,100.435366
"Drizzle,Fog",8.0675,7.03375,93.275,11.8625,5.2575,100.786625
"Drizzle,Ice Pellets,Fog",0.4,-0.7,92.0,20.0,4.0,100.79
"Drizzle,Snow",1.05,0.15,93.5,14.0,10.5,100.89
"Drizzle,Snow,Fog",0.693333,0.12,95.866667,15.533333,5.513333,99.281333
Fog,4.303333,3.159333,92.286667,7.946667,6.248,101.184067
Freezing Drizzle,-5.657143,-8.0,83.571429,16.571429,9.2,100.202857
"Freezing Drizzle,Fog",-2.533333,-4.183333,88.5,17.0,5.266667,100.441667


---

### 12. Show all records where weather condition is Fog.

In [134]:
print(df.groupby(df["Weather Condition"]).get_group("Fog"))

             Date/Time  Temp_C  Dew_Point_Temp_C  Rel_Hum_%  Wind_Speed_km/h  \
0       1/01/2012 0:00    -1.8              -3.9         86                4   
1       1/01/2012 1:00    -1.8              -3.7         87                4   
4       1/01/2012 4:00    -1.5              -3.3         88                7   
5       1/01/2012 5:00    -1.4              -3.3         87                9   
6       1/01/2012 6:00    -1.5              -3.1         89                7   
...                ...     ...               ...        ...              ...   
8716   12/29/2012 4:00   -16.0             -17.2         90                6   
8717   12/29/2012 5:00   -14.8             -15.9         91                4   
8718   12/29/2012 6:00   -13.8             -15.3         88                4   
8719   12/29/2012 7:00   -14.8             -16.4         88                7   
8722  12/29/2012 10:00   -12.0             -13.3         90                7   

      Visibility_km  Press_kPa Weather 