# Machine Learning

### Importing important modules and libraries

In [90]:
import pandas as pd
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split

### Loading dataset

In [91]:
data = load_diabetes()

### Checking the data attributes

In [92]:
dir(data)

['DESCR',
 'data',
 'data_filename',
 'data_module',
 'feature_names',
 'frame',
 'target',
 'target_filename']

### Data Description

In [93]:
print(data.DESCR)

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

  :Number of Instances: 442

  :Number of Attributes: First 10 columns are numeric predictive values

  :Target: Column 11 is a quantitative measure of disease progression one year after baseline

  :Attribute Information:
      - age     age in years
      - sex
      - bmi     body mass index
      - bp      average blood pressure
      - s1      tc, total serum cholesterol
      - s2      ldl, low-density lipoproteins
      - s3      hdl, high-density lipoproteins
      - s4      tch, total cholesterol / HDL
      - s5      ltg, possibly log of serum triglycerides level
      - s6      glu, blood sugar level

Note: Each of these 1

### Getting the features names

In [94]:
data.feature_names

### Getting the first data

In [95]:
data.data[0]

### Getting the first target variable

In [96]:
data.target[0]

In [97]:
data.target

### Creating Pandas Dataframe

In [98]:
df = pd.DataFrame(data.data, columns=data.feature_names)

### Checking first 50 datas

In [99]:
df.head(50)

### Checking first 5 datas

In [100]:
df.head()

### Appending data target to the Dataframe

In [101]:
df['target'] = data.target

### Checking the first 5 datas

In [102]:
df.head()

### Shape of data

In [103]:
df.shape

### Data Description

In [104]:
df.describe()

### Data Information

In [105]:
df.info()

### Data Correlation

In [106]:
df.corr()

### Checking correlation from highest to lowest on the basis of bmi and bp column

In [107]:
df.corr()['bmi'].sort_values(ascending=False)

In [108]:
df.corr()['bp'].sort_values(ascending=False)

### Splitting data into X and y variables

In [109]:
X = df.drop(['target'], axis=1)
y = df['target']

### Checking X and y 

In [110]:
X.head()

In [111]:
y.head()

### Splitting data into Training and Testing dataset

In [112]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

### Checking length of both Training and Testing set

In [113]:
len(X_train)

In [114]:
len(X_test)

# MACHINE LEARNING

In [115]:
kf = StratifiedKFold(n_splits=5)

In [116]:
kf

In [117]:
lr = cross_val_score(LogisticRegression(), X, y, cv=kf)
lr