In [55]:
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.impute import KNNImputer
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from catboost import CatBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings(action='ignore', category=ConvergenceWarning)

In [67]:
class DataProcessor:
    def __init__(self, path):
        self.path = path
        self.df = None

    def read_data(self):
        try:
            data = pd.read_csv(self.path)
            self.df = pd.DataFrame(data)
            return self.df
        except Exception as e:
            print(f"Dosya Okunamadı veya DataFrame'e Dönüşemedi: {e}")

    def show_data(self, n=5):
        if isinstance(self.df, pd.DataFrame):
            print(self.df.head(n))
        else:
            print("Veri yüklenmemiş veya geçersiz veri tipi. DataFrame bekleniyor.")

class Visualization(DataProcessor):
    def __init__(self, path):
        super().__init__(path)

    def lineplot_charter(self, x_axis, y_axis, fig_size=(10,5), x_label="x-axis", y_label="y-axis"):
        try:
            if self.df is None:
                self.read_data()
            plt.figure(figsize=fig_size) 
            plot = sns.lineplot(data=self.df, x=x_axis, y=y_axis)
            plt.xlabel(x_label)
            plt.ylabel(y_label)
            plt.show()
            return plot
        except Exception as e:
            print(f"Çizgi Grafiği Oluşamıyor. Hata: {e}")

    def hist_charter(self, x_axis, bins=100, fig_size=(10,5), x_label="x-axis", y_label="y-axis",
                      title='Title', fontdict={'fontsize': 11, 'fontweight': 'bold'}):
        try:
            if self.df is None:
                self.read_data()

            plt.figure(figsize=fig_size)
            plot = sns.histplot(data=self.df, x=x_axis, bins=bins)
            plt.xlabel(xlabel=x_label)
            plt.ylabel(ylabel=y_label)
            plt.title(label=title, fontdict=fontdict)
            plt.show()
            return plot
        except Exception as e:
            print(f"Histogram Grafiği Oluşturulamıyor. Hata: {e}")


class DropData(DataProcessor):
    def __init__(self, path):
        super().__init__(path)

    def drop_data(self, columns):
        # Ensure columns is a list
        if isinstance(columns, str):
            columns = [columns]
        try:
            if self.df is None:
                self.read_data()

            missing_columns = [column for column in columns if column not in self.df.columns]
            if missing_columns:
                print(f"Çıkartılması İstenilen Sütun Bulunamadı: {missing_columns}")
            else:
                self.df.drop(columns=columns, axis=1, inplace=True)
                print(f"{columns} Sütunları Silindi")
        except Exception as e:
            print(f"{columns} Sütunları Silinemedi. Hata: {e}")



class FeatureAdd(DataProcessor):
    def __init__(self, path):
        super().__init__(path)

    def sum_method(self, new_column_name, *columns):
        try:
            if self.df is None:
                self.read_data()

            # Sütunların mevcut olup olmadığını kontrol etme
            for column in columns:
                if column not in self.df.columns:
                    raise ValueError(f"Sütun {column} bulunamadı.")

            # Yeni sütunu oluşturma
            self.df[new_column_name] = self.df[list(columns)].sum(axis=1)
            print("Yeni özellik oluşturuldu!")
            
        except Exception as e:
            print(f"Yeni özellik oluşturulamadı! Hata: {e}")

    def substract_method(self, new_column_name, *columns):
        try:
            if self.df is None:
                self.read_data()

            for column in columns:
                if column not in self.df.columns:
                    print("İstenilen Sütun Bulunamadı")

            result = self.df[columns[0]].copy()
            for column in columns[1:]:
                result -= self.df[column]

            self.df[new_column_name] = result
            print("Yeni Özellik Oluşturuldu")

        except Exception as e:
            print(f"Yeni Özellik Oluşturulamadı {e}")

class DetectNaN():
    def __init__(self, train_path, test_path):
        self.train_path = train_path
        self.test_path = test_path
        self.train_df = None
        self.test_df = None

    def read_data(self):
        try:
            train_data = pd.read_csv(self.train_path)
            self.train_df = pd.DataFrame(train_data)

        except Exception as e:
            print("Veri okunamadı")

        try:
            test_data = pd.read_csv(self.test_path)
            self.test_df = pd.DataFrame(test_data)

        except Exception as e:
            print("Veri okunamadı.")

        
    def NuLL_Detector(self):
        if self.train_df is None or self.test_df is None:
            self.read_data()

        columns_to_impute_train = []
        columns_to_remove_train = []

        columns_to_impute_test = []
        columns_to_remove_test = []

        # For train path
        for column in self.train_df.columns:                                                       
            null_count = self.train_df[column].isnull().sum()                                      
            if null_count >= 500:                                                          
                columns_to_remove_train.append(column)
            elif null_count >= 1:                                                          
                columns_to_impute_train.append(column)

        # For test path
        for column in self.test_df.columns:                                                        
            null_count = self.test_df[column].isnull().sum()
            if null_count >= 500:
                columns_to_remove_test.append(column)
            elif null_count >= 1:
                columns_to_impute_test.append(column)

        print(f"Train: İmpute edilecek sütunlar: {columns_to_impute_train}")
        print(f"Train: Kaldırılacak sütunlar: {columns_to_remove_train}")

        print(f"Test: İmpute edilecek sütunlar: {columns_to_impute_test}")
        print(f"Test: Kaldırılacak sütunlar: {columns_to_remove_test}")

class CatColumn(DataProcessor):
    def __init__(self, path):
        super().__init__(path)

    def get_cat_columns(self):
        try:
            if self.df is None:
                self.read_data()

            # Kategorik sütunları seçme
            cat_columns = self.df.select_dtypes(include=['object'])
            return cat_columns

        except Exception as e:
            print(f"Veri Kategorik İşleme Giremedi! Hata: {e}")

    
class CorrGraph(DataProcessor):
    def __init__(self, path):
        super().__init__(path)

    def corr(self, column):
        try:
            if self.df is None:
                self.read_data()

            numeric_columns = self.df.select_dtypes(include = [float,int])
            corr = numeric_columns.corr()                                               
            corr_sale = corr[column].sort_values(ascending=False)              

            plt.figure(figsize=(18, 14))                                            

            plt.barh(corr_sale.index, corr_sale.values)                             
            plt.xlabel("Correlation", size=12)
            plt.ylabel("")
            plt.title(f"Relationship of the variables with  {column}", fontdict={'fontsize': 11, 'fontweight': 'bold'})
            plt.gca().invert_yaxis()                                                

            plt.show()

        except Exception as exp:
            print(f"Korelasyon Grafiği Oluşturulamadı: {exp}")


class TTSplit(DataProcessor):
    def __init__(self, train_path):
        super().__init__(train_path)

    def get_train_test(self, dependant, independent):
        try:
            if self.df is None:
                self.read_data()

            X = self.df.drop([dependant], axis = 1)
            y = self.df[independent]

            X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.25)

            return X_train, X_test, y_train, y_test
        

        except Exception as exp:
            print(f"Train seti ayrılamadı: {exp}")

            
class MLModels:
    def __init__(self, path):
        self.path = path
        self.df = None
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None

        self.models = {
            'LinearRegression': LinearRegression(),
            'RidgeRegression': Ridge(),
            'LassoRegression': Lasso(),
            'ElasticNet': ElasticNet(),
            'CatBoost': CatBoostRegressor(),
            'KNeighborsRegressor': KNeighborsRegressor(),
            'RandomForestRegressor': RandomForestRegressor(),
            'GradientBoostingRegressor': GradientBoostingRegressor(),
            'XGradientBoosting':XGBRegressor()
        }

        self.label_encoders = {}
        self.imputer = None

    def load_data(self):
        try:
            self.df = pd.read_csv(self.path)
        except Exception as e:
            print(f"Veri yüklenemedi: {e}")

    def preprocess_data(self, target_column, test_size=0.25, random_state=42):
        try:
            if self.df is None:
                self.load_data()

            

            X = self.df.drop([target_column], axis=1)
            y = self.df[target_column]

            # Label encoding for categorical columns
            cat_columns = X.select_dtypes(include=['object']).columns
            for col in cat_columns:
                le = LabelEncoder()
                X[col] = le.fit_transform(X[col])
                self.label_encoders[col] = le

            # KNNImputer ile eksik değerleri doldurma
            self.imputer = KNNImputer(n_neighbors=5, metric='nan_euclidean')
            X = pd.DataFrame(self.imputer.fit_transform(X), columns=X.columns)

            # Veriyi train ve test setlerine ayırma
            self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
                X, y, test_size=test_size, random_state=random_state)

        except Exception as e:
            print(f"Veri işlenirken bir hata oluştu: {e}")

    def train_model(self, model_name):
        try:
            if model_name not in self.models:
                raise ValueError(f"{model_name} modeli tanımlı değil.")

            self.models[model_name].fit(self.X_train, self.y_train)
            print(f"{model_name} Modeli Eğitildi!")

        except Exception as e:
            print(f"{model_name} Modeli Eğitilemedi: {e}")

    def full_train(self):
        try:
            for model_name, model in self.models.items():
                # Modelin eğitimi
                model.fit(self.X_train, self.y_train)

                # Modelin değerlendirilmesi
                y_pred = model.predict(self.X_test)
                mse = mean_squared_error(self.y_test, y_pred)
                r2 = r2_score(self.y_test, y_pred)
                print(f"{model_name} Modeli Ortalama Kare Hatası: {mse}")
                print(f"{model_name} Modeli R^2 Skoru: {r2}")

        except Exception as e:
            print(f"Model eğitim ve değerlendirme sırasında hata oluştu: {e}")

    def evaluate_model(self, model_name):
        try:
            if model_name not in self.models:
                raise ValueError(f"{model_name} modeli tanımlı değil.")

            y_pred = self.models[model_name].predict(self.X_test)

            # Regresyon modelleri için MSE hesaplama
            r2 = r2_score(self.y_test, y_pred)
            print(f"{model_name} Modeli Ortalama Kare Hatası: {r2}")
            mse = np.sqrt(mean_squared_error(np.log(self.y_test), np.log(y_pred)))
            print(f"{model_name} Modeli Ortalama Kare Hatası: {mse}")
            

        except Exception as e:
            print(f"{model_name} Modeli Değerlendirilemedi: {e}")

    def predict(self, model_name, data):
        try:
            if model_name not in self.models:
                raise ValueError(f"{model_name} modeli tanımlı değil.")

            # Yeni veri için tahmin yapma
            for col, le in self.label_encoders.items():
                if col in data:
                    data[col] = le.transform([data[col]])[0]

            data = pd.DataFrame(data, index=[0])  # Veriyi yeniden şekillendirme

            # Eksik değerleri doldurma
            data = pd.DataFrame(self.imputer.transform(data), columns=data.columns)

            prediction = self.models[model_name].predict(data)
            return prediction

        except Exception as e:
            print(f"Tahmin yapılamadı: {e}")
      