In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv("content/weather_classification_data.csv")

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13200 entries, 0 to 13199
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Temperature           13200 non-null  float64
 1   Humidity              13200 non-null  int64  
 2   Wind Speed            13200 non-null  float64
 3   Precipitation (%)     13200 non-null  float64
 4   Cloud Cover           13200 non-null  object 
 5   Atmospheric Pressure  13200 non-null  float64
 6   UV Index              13200 non-null  int64  
 7   Season                13200 non-null  object 
 8   Visibility (km)       13200 non-null  float64
 9   Location              13200 non-null  object 
 10  Weather Type          13200 non-null  object 
dtypes: float64(5), int64(2), object(4)
memory usage: 1.1+ MB


In [4]:
from sklearn.utils import shuffle
data = shuffle(data)
data.head()

Unnamed: 0,Temperature,Humidity,Wind Speed,Precipitation (%),Cloud Cover,Atmospheric Pressure,UV Index,Season,Visibility (km),Location,Weather Type
10588,12.0,62,12.5,71.0,overcast,1000.15,2,Winter,4.0,coastal,Rainy
13051,33.0,38,3.5,7.0,clear,1022.17,7,Summer,7.5,inland,Sunny
13195,10.0,74,14.5,71.0,overcast,1003.15,1,Summer,1.0,mountain,Rainy
3211,7.0,62,11.5,93.0,partly cloudy,921.5,14,Autumn,18.5,mountain,Snowy
704,29.0,51,11.5,14.0,partly cloudy,1016.46,3,Winter,5.0,coastal,Cloudy


In [5]:
# START CODE HERE
object_columns = data.select_dtypes(include=['object']).columns.tolist()
non_object_columns = data.select_dtypes(exclude=['object']).columns.tolist()
# END CODE HERE
print(f"Object Columns: {object_columns}\nNon Object Columns: {non_object_columns}")

Object Columns: ['Cloud Cover', 'Season', 'Location', 'Weather Type']
Non Object Columns: ['Temperature', 'Humidity', 'Wind Speed', 'Precipitation (%)', 'Atmospheric Pressure', 'UV Index', 'Visibility (km)']


In [6]:
# DON'T EDIT THIS CELL
if (object_columns == ['Cloud Cover', 'Season', 'Location', 'Weather Type'] and non_object_columns == ['Temperature', 'Humidity', 'Wind Speed', 'Precipitation (%)', 'Atmospheric Pressure', 'UV Index', 'Visibility (km)']):
  print(f"\033[32mTest Passed\033[0m")
else:
  print(f"\033[31mTest Failed\033[0m")

[32mTest Passed[0m


In [7]:
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

# START CODE HERE

transformers = [
    ('obj', OrdinalEncoder(), object_columns),
    ('num', StandardScaler(), non_object_columns)
]

# Create the ColumnTransformer
column_transformer = ColumnTransformer(transformers)

# Fit and transform the data
data_scaled_labeled = column_transformer.fit_transform(data)


# END CODE HERE

all_columns = object_columns + non_object_columns

# Create the new DataFrame
data_scaled_labeled = pd.DataFrame(data_scaled_labeled, columns=all_columns)

In [8]:
data_scaled_labeled_check = pd.read_csv("content/data_scaled_labeled_check.csv")
import numpy as np
data_scaled_labeled_values = np.sort(data_scaled_labeled.values,axis=0)
data_scaled_labeled_check_values = np.sort(data_scaled_labeled_check.values,axis=0)
if np.allclose(data_scaled_labeled_values, data_scaled_labeled_check_values, equal_nan=True):
  print(f"\033[32mTest Passed\033[0m")
else:
  print(f"\033[31mTest Failed\033[0m")

[32mTest Passed[0m


In [9]:
X = data_scaled_labeled.drop(columns=['Weather Type'])
y = data_scaled_labeled['Weather Type']

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y, train_size = 0.9, random_state = 10)

In [10]:
if (len(X_train)==11880 and len(X_test)==1320):
  print(f"\033[32mTest Passed\033[0m")
else:
  print(f"\033[31mTest Failed\033[0m")

if (len(y_train)==11880 and len(y_test)==1320):
  print(f"\033[32mTest Passed\033[0m")
else:
  print(f"\033[31mTest Failed\033[0m")

[32mTest Passed[0m
[32mTest Passed[0m


In [11]:
from sklearn import svm

model = svm.SVC()
model.fit(X_train,y_train)
score = model.score(X_test,y_test)

# END CODE HERE

print("Accuracy:", score*100)

Accuracy: 92.1969696969697


In [12]:
if (score>0.90):
  print(f"\033[32mTest Passed\033[0m")
else:
  print(f"\033[31mTest Failed\033[0m")

[32mTest Passed[0m


In [13]:
input = X_test.head()
input

Unnamed: 0,Cloud Cover,Season,Location,Temperature,Humidity,Wind Speed,Precipitation (%),Atmospheric Pressure,UV Index,Visibility (km)
11923,0.0,1.0,1.0,1.315591,-1.273224,-0.192836,-1.334918,0.6267,0.257813,1.345768
2331,2.0,3.0,1.0,-1.502823,0.955217,1.182293,0.073739,-0.401845,-0.77941,-0.137308
217,3.0,1.0,1.0,-0.582524,-1.916996,0.675666,0.762415,0.607076,-0.520104,0.60423
1760,3.0,1.0,0.0,3.616336,0.261924,1.399419,0.856326,0.440401,1.55434,1.049153
3963,2.0,1.0,2.0,-0.007338,0.162883,-0.409962,0.105042,-0.122261,-1.038715,-0.582231


In [14]:
y = model.predict(input)
y

array([3., 2., 0., 3., 1.])