In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
import joblib
import warnings

# Suppress RuntimeWarnings for skew calculation
warnings.filterwarnings("ignore", category=RuntimeWarning)

# Load the CSV file
data = pd.read_csv("/content/sample_data/solarpowergeneration.csv")

# Fill missing values with mean
data.fillna(data.mean(numeric_only=True), inplace=True)

# Remove all outliers completely until none remain
while True:
    total_outliers = 0
    for col in data.select_dtypes(include='number').columns:
        Q1 = data[col].quantile(0.25)
        Q3 = data[col].quantile(0.75)
        IQR = Q3 - Q1
        lower = Q1 - 1.5 * IQR
        upper = Q3 + 1.5 * IQR
        outliers = data[(data[col] < lower) | (data[col] > upper)]
        total_outliers += len(outliers)

        # Remove outliers
        data = data[(data[col] >= lower) & (data[col] <= upper)]

    if total_outliers == 0:
        break

# Filter numeric features with non-zero standard deviation to avoid skew warning
numeric_data = data.select_dtypes(include='number')
non_constant_data = numeric_data.loc[:, numeric_data.std() > 1e-6]

# Check skewness
skewed_features = non_constant_data.apply(lambda x: skew(x.dropna())).sort_values(ascending=False)

# Filter features with high skewness
high_skew = skewed_features[abs(skewed_features) > 0.75]

# Apply log1p transformation to reduce skewness
for col in high_skew.index:
    data[col] = np.log1p(data[col])

# Save feature names for consistency during inference
training_columns = data.drop(columns=['power-generated']).columns

# Define features and target
X = data.drop('power-generated', axis=1)
y = data['power-generated']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train XGBoost Regressor
xgb_model = XGBRegressor(random_state=42)
xgb_model.fit(X_train_scaled, y_train)

# Save model, scaler, and feature names
joblib.dump(xgb_model, "xgb.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(list(training_columns), "feature_names.pkl")

print("Model, Scaler & Feature Names Saved Successfully!")


Model, Scaler & Feature Names Saved Successfully!


**Streamlit script**

In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import joblib

# Load model, scaler, and feature names
model = joblib.load("xgb.pkl")
scaler = joblib.load("scaler.pkl")
feature_names = joblib.load("feature_names.pkl")

# Page configuration and styling
st.set_page_config(page_title="Solar Power Prediction", page_icon="🔆",
                   layout="wide")
st.markdown(
    """
    <style>
        .stApp {
            background: linear-gradient(to right, #2b5876, #4e4376);
            color: white;
        }
        .stSidebar {
            background-color: #333;
        }
        .stButton>button {
            background-color: #ff7f50;
            color: white;
            border-radius: 10px;
            font-size: 18px;
        }
    </style>
    """, unsafe_allow_html=True
)

st.title("🔆 Solar Power Generation Prediction")
st.markdown("Provide feature values below to predict power generated from the solar panel.")

st.sidebar.header("Input Solar Features")

# Create input form
def get_user_input():
    input_df = pd.DataFrame(columns=feature_names)
    input_df.loc[0] = 0  # Initialize with zeros

    for feature in feature_names:
        if "temperature" in feature or "humidity" in feature:
            input_df[feature] = st.sidebar.number_input(f"{feature}",
                                                        min_value=0.0, max_value=100.0, value=25.0)
        else:
            input_df[feature] = st.sidebar.number_input(f"{feature}", value=1.0)

    return input_df

input_data = get_user_input()

# Scale the data
scaled_input = scaler.transform(input_data)

# Prediction section
st.subheader("Prediction Result")
if st.button("Predict Power"):
    prediction = model.predict(scaled_input)[0]
    st.success(f"Predicted Power Generated: **{np.expm1(prediction):.2f} units**")