In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
import xgboost as xgb
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
import warnings
warnings.filterwarnings('ignore')  # Suppress warnings for cleaner output

In [None]:
# Load Historical Election Data
election_data = pd.read_csv('/content/preprocessed_election_filtered.csv')

In [None]:
# Sentiment Data
sentiment_data = pd.DataFrame({
    'Party': ['bjp', 'shiv sena', 'shiv sena ubt', 'congress', 'ncp'],
    'Average_Sentiment': [0.009492, -0.005274, -0.004646, 0.026280, 0.004637]
})

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import classification_report, accuracy_score

class MaharashtraElectionPredictor:
    def __init__(self, historical_data, base_year=2019):
        """
        Initialize with historical election data
        @param historical_data: DataFrame containing election data
        @param base_year: Base year for prediction (default 2019)
        """
        self.total_seats = 288  # Maharashtra Assembly seat count
        self.base_year = base_year

        # Print data overview
        years = sorted(historical_data['Year'].unique())
        print(f"Available election years: {years}")
        print(f"Total records: {len(historical_data)}")
        print(f"Using base year: {self.base_year}")

        # Filter data for model training
        self.training_data = historical_data[
            (historical_data['Year'] < self.base_year) &
            (historical_data['Year'] >= self.base_year - 15)  # Use last 15 years for training
        ].copy()

        # Current year data for validation
        self.current_data = historical_data[
            historical_data['Year'] == self.base_year
        ].copy()

        # Validate constituency count
        self.validate_data()

        self.preprocessed_data = None
        self.model = None
        self.feature_columns = None
        self.label_encoder = None

    def validate_data(self):
        """
        Validate dataset completeness and quality
        """
        current_constituencies = self.current_data['Constituency_Name'].nunique()
        print(f"\nData Validation for {self.base_year}:")
        print(f"Total Constituencies found: {current_constituencies}")
        if current_constituencies != self.total_seats:
            print(f"WARNING: Expected {self.total_seats} constituencies, found {current_constituencies}")

        # Check constituency coverage
        constituencies_with_winners = self.current_data[
            self.current_data['Position'] == 1
        ]['Constituency_Name'].nunique()
        print(f"Constituencies with confirmed winners: {constituencies_with_winners}")

        # Check party distribution
        party_distribution = self.current_data[
            self.current_data['Position'] == 1
        ]['Party'].value_counts()
        print("\nWinning party distribution in base year:")
        print(party_distribution)

    def create_advanced_features(self, df):
        """
        Create advanced electoral features
        """
        # Historical performance by constituency
        df['Const_Victory_Rate'] = df.groupby(['Constituency_Name', 'Party'])['Position'].transform(
            lambda x: (x == 1).mean()
        ).fillna(0)

        # Recent performance (last 2 elections)
        df['Recent_Victory_Rate'] = df.groupby(['Constituency_Name', 'Party']).apply(
            lambda x: (x.sort_values('Year', ascending=False)['Position'] == 1).head(2).mean()
        ).fillna(0)

        # Party strength in constituency type
        df['Type_Victory_Rate'] = df.groupby(['Constituency_Type', 'Party'])['Position'].transform(
            lambda x: (x == 1).mean()
        ).fillna(0)

        # Incumbent advantage
        df['Incumbent_Success'] = df.groupby(['Constituency_Name'])['Incumbent'].transform(
            lambda x: (x & (df.loc[x.index, 'Position'] == 1)).mean()
        ).fillna(0)

        return df

    def build_model(self):
        """
        Build and train the prediction model
        """
        X, y = self.preprocess_data()
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Create ensemble model
        model = VotingClassifier(
            estimators=[
                ('xgb', xgb.XGBClassifier(
                    n_estimators=200,
                    learning_rate=0.05,
                    max_depth=4,
                    random_state=42
                )),
                ('rf', RandomForestClassifier(
                    n_estimators=150,
                    max_depth=5,
                    random_state=42
                ))
            ],
            voting='soft'
        )

        # Create pipeline
        self.model = Pipeline([
            ('scaler', StandardScaler()),
            ('classifier', model)
        ])

        # Train and evaluate
        self.model.fit(X_train, y_train)
        y_pred = self.model.predict(X_test)

        print("\nModel Performance:")
        print(classification_report(y_test, y_pred))

        return self.model

    def predict_elections(self):
        """
        Predict election results using current year data
        """
        if self.model is None:
            self.build_model()

        # Preprocess current year data
        current_features = self.preprocess_current_data()

        # Get predictions
        probabilities = self.model.predict_proba(current_features)[:, 1]

        # Create results DataFrame
        results = pd.DataFrame({
            'Constituency_Name': self.current_data['Constituency_Name'],
            'Party': self.current_data['Party'],
            'Win_Probability': probabilities
        })

        # Get winning predictions
        constituency_winners = (
            results.sort_values(['Constituency_Name', 'Win_Probability'], ascending=[True, False])
            .groupby('Constituency_Name')
            .first()
            .reset_index()
        )

        # Calculate party-wise seats
        party_results = constituency_winners['Party'].value_counts().reset_index()
        party_results.columns = ['Party', 'Predicted_Seats']
        party_results['Seat_Share_Percentage'] = (
            party_results['Predicted_Seats'] / self.total_seats * 100
        ).round(2)

        return party_results, constituency_winners

In [None]:
def analyze_results(party_results, total_seats=288):
    """
    Analyze election results and potential coalitions
    """
    majority_mark = total_seats // 2 + 1

    print(f"\n=== Election Prediction Results ===")
    print(f"Total Assembly Seats: {total_seats}")
    print("\nParty-wise Seat Prediction:")
    print(party_results.to_string(index=False))

    print(f"\nMajority Required: {majority_mark} seats")

    winner = party_results.iloc[0]
    print(f"Predicted Leading Party: {winner['Party']}")
    print(f"Predicted Seats: {winner['Predicted_Seats']} ({winner['Seat_Share_Percentage']}%)")

    if winner['Predicted_Seats'] >= majority_mark:
        print("Prediction: Clear Majority")
    else:
        print("Prediction: Hung Assembly / Coalition Needed")
        print("\nPossible Coalition Scenarios:")
        for _, party in party_results[1:].iterrows():
            combined_seats = winner['Predicted_Seats'] + party['Predicted_Seats']
            print(f"{winner['Party']} + {party['Party']}: {combined_seats} seats "
                  f"({(combined_seats/total_seats*100):.2f}%)")

In [None]:
def run_fixed_prediction(election_data, sentiment_data=None, total_seats=288):
    predictor = FixedElectionPredictor(election_data, total_seats)
    return predictor.predict_elections(sentiment_data)

In [None]:
# Execution code
def execute_fixed_analysis():
    try:
        print("Loading data...")
        election_data = pd.read_csv('/content/preprocessed_election_filtered.csv')

        sentiment_data = pd.DataFrame({
            'Party': ['BJP', 'Shiv Sena', 'Congress', 'NCP', 'Independent'],
            'Average_Sentiment': [0.009492, -0.005274, 0.026280, 0.004637, 0.0]
        })

        print("\nInitial Data Statistics:")
        print(f"Years covered: {election_data['Year'].unique()}")
        print(f"Total historical records: {len(election_data)}")
        print(f"Unique constituencies: {election_data['Constituency_Name'].nunique()}")

        results = run_fixed_prediction(election_data, sentiment_data)

        # Save results
        results.to_csv('maharashtra_election_prediction.csv', index=False)
        print("\nResults saved to 'maharashtra_election_prediction.csv'")

        return results

    except Exception as e:
        print(f"\nAn error occurred: {e}")
        raise


In [None]:
if __name__ == "__main__":
    print("Starting Maharashtra Assembly Election Prediction...")
    print("================================================")
    results = execute_fixed_analysis()

Starting Maharashtra Assembly Election Prediction...
Loading data...

Initial Data Statistics:
Years covered: [2019 2014 2015 2016 2009 2012 2013 2004 2005 2006 1999 2000 2001 2002
 2003 1995 1996 1997 1998 1990 1985 1980 1978 1972 1967 1962]
Total historical records: 11025
Unique constituencies: 547

An error occurred: name 'FixedElectionPredictor' is not defined


NameError: name 'FixedElectionPredictor' is not defined