In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# Step 1: Data Collection (simulating climate data)
data = {
    'temperature': np.random.normal(20, 5, 1000),  # Generating random temperature data
    'humidity': np.random.normal(50, 10, 1000),  # Generating random humidity data
    'wind_speed': np.random.normal(5, 2, 1000),  # Generating random wind speed data
    'target': np.random.normal(21, 5, 1000)  # Generating random target data
}
df = pd.DataFrame(data)

# Step 2: Data Preprocessing
df = df.dropna()  # Handle missing values by dropping rows with any NaNs
scaler = StandardScaler()
normalized_data = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

# Step 3: Statistical Analysis
mean_temp = normalized_data['temperature'].mean()
std_temp = normalized_data['temperature'].std()

# Step 4: Model Training and Prediction
X = normalized_data.drop('target', axis=1)
y = normalized_data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = SVR()
model.fit(X_train, y_train)
predictions = model.predict(X_test)

# Model Performance Metrics
mse = mean_squared_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

# Step 5: Visualization
plt.figure(figsize=(10, 6))
plt.scatter(y_test, predictions)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.title('Predicted vs Actual Temperature (SVR)')
plt.show()

# Step 6: Reporting
report = f"""
Statistical Summary:
Mean Temperature: {mean_temp}
Standard Deviation of Temperature: {std_temp}

Model Performance:
Mean Squared Error: {mse}
R^2 Score: {r2}
"""

report
