# Logistic Regression with Python

# Logisztikus regresszió / Bevezetés az osztályozásaba



In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
X = pd.read_csv('files/titanic_train.csv') # az adat betöltése  és áttekintése
X.head()

FileNotFoundError: File b'titanic_train.csv' does not exist

# Exploratory Data Analysis II.

# Feltáró jellegű adatelemzés II


## Missing Data

## Hiányzó adatok kezelése 


We can use seaborn to create a simple heatmap to see where we are missing data!

Elsőként nézzük meg, hol vannak hiányzó adatok 

In [None]:
sns.heatmap(X.isnull(),yticklabels=False,cbar=False,cmap='viridis')

# Countplot   / Diszkrét eloszlás ábrázolása

In [None]:
sns.set_style('whitegrid')
sns.countplot(x='Survived',data=X,palette='RdBu_r')

In [None]:
sns.countplot(x='SibSp',data=X)

In [None]:
sns.set_style('whitegrid')
sns.countplot(x='Survived',hue='Sex',data=X,palette='RdBu_r')

In [None]:
sns.set_style('whitegrid')
sns.countplot(x='Survived',hue='Pclass',data=X,palette='rainbow')

## Distplot  - Folytonos változó eloszlásának vizualizációja 

In [None]:
sns.distplot(X['Age'].dropna(),kde=False,color='darkred',bins=30)

## Box plot - dobozdiagram 

In [None]:
plt.figure(figsize=(12, 7))
sns.boxplot(x='Pclass',y='Age',data=X)

## Filling missing data by function  / hiányzó adat pótlása függvénnyel


In [None]:
def impute_age(cols):
    Age = cols[0]
    Pclass = cols[1]
    
    if pd.isnull(Age):

        if Pclass == 1:
            return 37

        elif Pclass == 2:
            return 29

        else:
            return 24

    else:
        return Age

In [None]:
X['Age'] = X[['Age','Pclass']].apply(impute_age,axis=1) # Now apply that function! / Alkalmazzuk a függvényt 

In [None]:
sns.heatmap(X.isnull(),yticklabels=False,cbar=False,cmap='viridis') # Now let's check that heat map again!

Great! Let's go ahead and drop the Cabin column and the row in Embarked that is NaN.

In [None]:
X.drop('Cabin',axis=1,inplace=True)

In [None]:
X.head()

In [None]:
X.dropna(inplace=True)

## Converting Categorical Features 

## Kategória változók konverálása 

We'll need to convert categorical features to dummy variables using pandas! Otherwise our machine learning algorithm won't be able to directly take in those features as inputs.

Dummy változókká kell lakítani a változóink

In [None]:
X.info()

In [None]:
sex = pd.get_dummies(X['Sex'],drop_first=True)
embark = pd.get_dummies(X['Embarked'],drop_first=True)
sex.head()

In [None]:
X.drop(['Sex','Embarked','Name','Ticket'],axis=1,inplace=True)
X.head()

In [None]:
train = pd.concat([X,sex,embark],axis=1)

In [None]:
train.head()

# Building a Logistic Regression model

# Modell építése 

Let's start by splitting our data into a training set and test set (there is another test.csv file that you can play around with in case you want to use all this data for training).

## Train-Test Split / Tanítási és ellenőrzési minta felosztása

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train.drop('Survived',axis=1), 
                                                    train['Survived'], test_size=0.30, 
                                                    random_state=101)

## Training and Predicting

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
logmodel = LogisticRegression()
logmodel.fit(X_train,y_train)

In [None]:
predictions = logmodel.predict(X_test)
predictions

## Evaluation / Kiértékelés 

In [None]:
from sklearn.metrics import classification_report,confusion_matrix
confusion_matrix(y_test,predictions)

In [None]:
logmodel

In [None]:
logmodel.coef_

In [None]:
print(classification_report(y_test,predictions))