<a href="https://colab.research.google.com/github/isosm/MLIO/blob/main/Copy_of_Mall_f%C3%B6r_ML_IO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**1. Importera nödvändiga bibliotek**


In [None]:
# Standardbibliotek och inställningar
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.impute import SimpleImputer

# Inställningar för visualiseringar
sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (10, 6)


**2. Definiera funktioner för datahantering och analys**


In [None]:
filepath = 'path_to_iris.csv'  # Uppdatera med rätt sökväg till CSV-filen
data = pd.read_csv(filepath)



In [None]:
# Visa de första raderna av data
print("### EDA ###")
print("Första 5 raderna av data:\n", data.head())

# Beskrivande statistik
print("\nBeskrivande statistik:\n", data.describe())

# Saknade värden
print("\nSaknade värden:\n", data.isnull().sum())

# Pairplot för att visualisera data
sns.pairplot(data, hue='target')
plt.show()


**4. Förbered data**


In [None]:
# Definiera numeriska och (om tillämpligt) kategoriska funktioner
numerical_features = data.columns[:-1]  # Alla utom målvariabeln
categorical_features = None  # Inga kategoriska funktioner i detta dataset
target = 'target'

# Hantera saknade värden
imputer = SimpleImputer(strategy='mean')
data[numerical_features] = imputer.fit_transform(data[numerical_features])

# Skala numeriska funktioner
numerical_transformer = StandardScaler()

# Skapa ColumnTransformer
transformers = [('num', numerical_transformer, numerical_features)]
if categorical_features:
    categorical_transformer = OneHotEncoder(handle_unknown='ignore')
    transformers.append(('cat', categorical_transformer, categorical_features))

preprocessor = ColumnTransformer(transformers=transformers)


5**. Dela upp data**


In [None]:
X = data.drop(columns=[target])
y = data[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


**6. Definiera modeller och parametergrider för GridSearchCV**

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier

param_grid = [
    {
        'classifier': [LogisticRegression()],
        'classifier__C': [0.1, 1, 10, 100]
    },
    {
        'classifier': [RandomForestClassifier()],
        'classifier__n_estimators': [50, 100, 200],
        'classifier__max_depth': [None, 10, 20, 30]
    },
    {
        'classifier': [SVC()],
        'classifier__C': [0.1, 1, 10],
        'classifier__gamma': [0.001, 0.01, 0.1]
    },
    {
        'classifier': [GradientBoostingClassifier()],
        'classifier__n_estimators': [100, 200],
        'classifier__learning_rate': [0.01, 0.1, 0.2],
        'classifier__max_depth': [3, 5, 7]
    }
]


**7. Bygg pipeline**

In [None]:
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', LogisticRegression())])  # Placeholder model


**8. Kör GridSearchCV**

In [None]:
grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)



**9. Bästa modell och utvärdering**

In [None]:
# Hitta den bästa modellen
best_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)

# Utvärdera den bästa modellen
y_pred = best_model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
