In [None]:
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd 
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
df = pd.read_csv("/kaggle/input/pima-indians-diabetes-database/diabetes.csv")

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.isna().any()

In [None]:
df.notna().any()

In [None]:
df.isna().all()

In [None]:
df.notna().all()

In [None]:
df.notna().sum()

In [None]:
df["Overweight"] = [1 if x > 25 else 0 for x in df.BMI]
df.head()

## Relational Plots with Matplotlib

* Scatter plot: iki değişken arasındaki ilişkinin dağılımını veri noktalarıyla gösterir.
* Lineplot: İki değişken arasındaki ilişkiyi sürekli gösterir. Veri noktaları birbirine çizgilerle bağlıdır. (Zaman serilerinde kullanılır) 

* s parametresi: marker boyutu
* c parametresi: marker rengi, hangi değişkeni tuttuğu da yazılabilir
* alpha: marker opaklığı

In [None]:
plt.rcParams.update({'font.size': 25})

In [None]:
sns.set_context("paper")

In [None]:
plt.scatter(df.Age, df.Insulin, c=df.Overweight, s=389,
            alpha=0.2, cmap="viridis") #cmap renk paleti
plt.colorbar(); #hangi rengin hangi değere denk geldiğini gösteren yandaki ölçek
plt.xlabel("Age") #eksen ismi
plt.ylabel("Insulin") 
plt.title("Relationship between Age and Insulin") #plot ismi
plt.show()

![scatter](http://chartio.com/assets/5689fd/tutorials/charts/scatter-plots/a9b8dd5dc2057a70446e5aa32f32b49d54b55f5cabf17a4610e2da94bea7fed5/scatter-plot-example-2.png)

## Scatter plot with Subplots
fig, ax = plt.subplots(): figure ve axes object oluşturur. figure’de her şey var, axes data’yı tutuyor.

In [None]:
fig, ax = plt.subplots(1,2)
plt.show()

In [None]:
fig, ax = plt.subplots()
ax.scatter(df.Age, df.Insulin, c=df.Overweight, cmap="viridis")
ax.set_xlabel("Age")
ax.set_ylabel("Insulin")
ax.set_title("Relationship between Age and Insulin")
plt.show()

Histogram: Numerik ya da kategorik verilerde dağılımı yorumlamamıza yardımcı olur.

In [None]:
fig, ax = plt.subplots()
ax.hist(df.Age, label="Age", bins=10)
ax.set_xlabel("Age")
ax.set_ylabel("Number of Observations")
plt.show()

In [None]:
bins=[20, 30, 40, 50, 60, 70, 80]
fig, ax = plt.subplots()
ax.hist(df.Age, label="Age Bins", bins=bins)
ax.set_xlabel("Age")
ax.set_ylabel("Number of Observations")
plt.show()

Bar plot: Kategorik verinin özelliklerine bakmamızı sağlar. 

In [None]:
fig, ax = plt.subplots()
ax.bar(df.Outcome, df.Insulin)
ax.set_xlabel("Outcome")
ax.set_ylabel("Insulin")
plt.show()

## Kaydetme

In [None]:
fig, ax = plt.subplots()
ax.bar(df.Age, df.Insulin)
ax.set_xticklabels(df.Age, rotation=45)
fig.savefig("Age.png")

* fig.savefig(“Age.png”): kayıp olmadan kaydeder, yüksek kalitelidir ama çok hafıza tutar
* fig.savefig(“Age.jpg”, quality=50): websitesine konulabilir
* fig.savefig(“Age.png”, dpi=200): dots per inch, dense rendering
* fig.set_size_inches([5,3]): aspect ratio

## Seaborn

* FacetGrid (relplot(), catplot()) subplotlar oluşturabilir
* AxesSubplot(scatterplot, countplot) bir tane plot oluşturur

## Count Plot

In [None]:
sns.set_palette("RdBu")
sns.countplot(x="Age", data=df)
plt.show()

In [None]:
sns.catplot(x="Age", aspect=3, data=df, kind="count")
plt.show()

In [None]:
g = sns.catplot(x="Age", aspect=3, data=df, kind="count")
g.fig.suptitle("Age Counts", y=1.04) #ismi yukarı çıkarıyor
plt.show()

In [None]:
g = sns.catplot(x="Age", aspect=3, data=df, kind="count")
plt.xticks(rotation=30)
plt.show()

## Scatter plot

In [None]:
sns.scatterplot(x="Age", y="Insulin",data=df, hue="Outcome")
plt.show()

In [None]:
sns.relplot(x="Age", y="Insulin",data=df, hue="Outcome", 
            kind="scatter")
plt.show()

## Line plot

In [None]:
sns.relplot(x="Glucose", y="Insulin", data=df, kind="line", ci="sd", markers=True, dashes=True)
plt.show()

In [None]:
sns.relplot(x="Age", y="Insulin", data=df, kind="line", aspect=4, ci="sd")
plt.show()

In [None]:
sns.relplot(x="Age", y="Insulin", data=df, kind="line", aspect=4, ci=None)
plt.show()

## Scatter Subplots

In [None]:
sns.relplot(x="Insulin", y="Glucose", data=df, kind="scatter", row="Outcome")
plt.show()

In [None]:
sns.relplot(x="Insulin", y="Glucose", data=df, kind="scatter", col="Outcome", row="Overweight")
plt.show()

## Heatmap

In [None]:
sns.set_palette("RdBu")
correlation=df.corr()
sns.heatmap(correlation, annot=True)
plt.show()

## Categorical Plots

In [None]:
sns.catplot(x="Outcome",y="Insulin",data=df, kind="bar")
plt.show()

![boxplot](https://miro.medium.com/max/18000/1*2c21SkzJMf3frPXPAR_gZA.png)

In [None]:
sns.catplot(x="Outcome",y="Age",data=df, kind="box")
plt.show()

In [None]:
sns.catplot(x="Outcome",data=df, kind="count")
plt.show()

## Presets

In [None]:
sns.set_style("dark")
sns.catplot(x="Outcome",data=df, kind="count")
plt.show()
## white, dark, whitegrid, darkgrid, ticks

In [None]:
sns.set_context("notebook")
sns.set_palette("Greys_r")
sns.catplot(x="Outcome",data=df, kind="count")
plt.show()
# categorical plotlarda “RdBu”, “PRGn”,”RdBu_r”,”PRGn_r”
# continuous plotlarda Greys, Blues, PuRd, GnBu

In [None]:
sns.set_context("paper")
plt.show()
# paper,poster, talk