In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix
from sklearn.linear_model import LinearRegression, LogisticRegression
import statsmodels.api as sm

In [None]:
df=pd.read_csv('diabetes.csv')

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.describe

## Univariate analysis: Frequency, Mean, Median, Mode, Variance, Standard Deviation,


# skewness and kurtosis

In [None]:
for column in df.columns:
    print(f"Column: {column}")
    print(f"Frequency:\n{df[column].value_counts()}\n") 
    print(f"Mean: {df[column].mean()}")
    print(f"Median: {df[column].median()}")
    print(f"Mode:\n{df[column].mode()}")
    print(f"Variance: {df[column].var()}")
    print(f"Standard Deviation:{df[column].std()}") 
    print(f"Skewness:{df[column].skew()}")
    print(f"Kurtosis: {df[column].kurt()}")
    print(" \n")


# Bivariate analysis: Linear and logistic regression modeling

In [None]:
X_linear = df[['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']]
y_linear = df['Outcome']

In [None]:
model_linear = LinearRegression()
model_linear.fit(X_linear, y_linear)

In [None]:
print('Linear Regression Coefficients:')
for feature, coef in zip(X_linear.columns, model_linear.coef_):
    print(f'{feature}: {coef}')

In [None]:
predictions_linear = model_linear.predict(X_linear)

In [None]:
X_logistic = df[['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']]
y_logistic = df['Outcome']


In [None]:
model_logistic = LogisticRegression()
model_logistic.fit(X_logistic, y_logistic)

In [None]:
print('Logistic Regression Coefficients:')
for feature, coef in zip(X_logistic.columns,model_logistic.coef_[0]): 
    print(f'{feature}: {coef}')

In [None]:
predictions_logistic = model_logistic.predict(X_logistic)

# Multiple Regression Analysis

In [None]:
X = df.drop('Outcome', axis=1) 
y = df['Outcome'] 


In [None]:
X = sm.add_constant(X)


In [None]:
model = sm.OLS(y, X)
results = model.fit()


In [None]:
print(results.summary())


In [None]:
corr=df.corr()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(corr, vmin=-1, vmax=1) 
fig.colorbar(cax)
ticks = np.arange(0, 9, 1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
names = df.columns
ax.set_xticklabels(names,rotation=90) 
ax.set_yticklabels(names)
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = [20, 20]
scatter_matrix(df)
plt.show()