In [28]:
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression

class EarthquakePredictor:
    def __init__(self, data_path):
        self.earthquake_data = pd.read_csv(data_path)
        self.features = ['Latitude', 'Longitude']
        self.target = 'Magnitude'
        self.X_train, self.X_test, self.y_train, self.y_test = self._prepare_data()
        self.trained_regressor = None

    def _prepare_data(self):
        X = self.earthquake_data[self.features].to_numpy()
        y = self.earthquake_data[self.target].to_numpy()
        return train_test_split(X, y, test_size=0.2, random_state=12345)

    def train_linear_regression(self):
        regressor = LinearRegression()
        regressor.fit(self.X_train, self.y_train)
        self.trained_regressor = regressor

    def train_random_forest(self, n_estimators, random_state, model_name):
        regressor = RandomForestRegressor(n_estimators=n_estimators, random_state=random_state)
        self._train_and_evaluate(regressor, model_name)

    def train_gradient_boosting(self, n_estimators=100, learning_rate=0.1, random_state=0, model_name='Gradient Boosting'):
        regressor = GradientBoostingRegressor(n_estimators=n_estimators, learning_rate=learning_rate, random_state=random_state)
        self._train_and_evaluate(regressor, model_name)

    def predict_magnitude(self, test_data):
        # Make sure your model is trained
        if hasattr(self, 'trained_regressor') and self.trained_regressor is not None:
            predicted_magnitude = self.trained_regressor.predict(test_data)
            return predicted_magnitude
        else:
            print("Model not trained. Please train a model first.")
            return None

    def _train_and_evaluate(self, regressor, model_name):
        regressor.fit(self.X_train, self.y_train)
        predictions = regressor.predict(self.X_test)
        rmse = metrics.mean_squared_error(self.y_test, predictions, squared=False)
        mae = metrics.mean_absolute_error(self.y_test, predictions)
        print(f'{model_name}: RMSE = {rmse}, MAE = {mae}')
        with open(f'{model_name.lower().replace(" ", "_")}_regressor.pickle', 'wb') as handle:
            pickle.dump(regressor, handle)
            

    def compare_models(self):
        model_names = ['Linear Regression', 'Random Forest (Default)', 'Gradient Boosting']
        df_compare_all = pd.DataFrame(columns=model_names)
        for model_name in model_names:
            regressor = self._load_model(model_name)
            if regressor:
                predictions = regressor.predict(self.X_test)
                rmse = metrics.mean_squared_error(self.y_test, predictions, squared=False)
                mae = metrics.mean_absolute_error(self.y_test, predictions)
                df_compare_all[model_name] = [rmse, mae]
            else:
                df_compare_all[model_name] = [None, None]
        df_compare_all.index = ['RMSE', 'MAE']
        return df_compare_all

    def _load_model(self, model_name):
        try:
            with open(f'{model_name.lower().replace(" ", "_")}_regressor.pickle', 'rb') as handle:
                return pickle.load(handle)
        except FileNotFoundError:
            print(f"Model not found: {model_name}")
            return None


    def display_summary_statistics(self):
        print(self.earthquake_data.describe())
    
    def display_whole_tab(self):
        print(self.earthquake_data.head(-40))

    def display_info(self):
        print(self.earthquake_data.info())

    def check_missing_values(self):
        print(self.earthquake_data.isnull().values.any())

    def display_data_distribution(self):
        self.earthquake_data['Magnitude'].plot.kde()

    def remove_not_needed_columns(self, columns_to_remove):
        self.earthquake_data = self.earthquake_data.drop(columns=columns_to_remove, errors='ignore')

    def display_box_plots(self):
        for column in ['Latitude', 'Longitude', 'Depth', 'Magnitude']:
            self.earthquake_data[column].plot(kind='box')
            plt.show()

    def display_correlation_matrix(self):
        numeric_columns = self.earthquake_data.select_dtypes(include=['float64']).columns
        correlations = self.earthquake_data[numeric_columns].corr()

        fig, ax = plt.subplots(figsize=(12, 12))
        colormap = sns.color_palette("BrBG", 12)

        sns.heatmap(correlations,
                    cmap=colormap,
                    annot=True,
                    fmt=".2f",
                    ax=ax)

        ax.set_yticklabels(correlations.columns)
        plt.show()
        
        


IndentationError: unindent does not match any outer indentation level (<tokenize>, line 34)