In [1]:
import numpy as np
import pandas as pd

## Series

In [2]:
my_dict = {"Kevin":19, "George":45, "Steven":76}
pd.Series(my_dict)

Kevin     19
George    45
Steven    76
dtype: int64

In [3]:
ages = [19,45,76]
names = ["Kevin", "George", "Steven"]

pd.Series(data=ages, index=names)

Kevin     19
George    45
Steven    76
dtype: int64

In [4]:
np_arr = np.array([19,45,76])
pd.Series(np_arr, names)

Kevin     19
George    45
Steven    76
dtype: int64

In [5]:
score1 = pd.Series(data=[10,2,5], index=["K1","K2","K3"])
score2 = pd.Series(data=[11,6,8], index=["K1","K2","K3"])

In [6]:
total_score = score1 + score2
print(total_score)
print()
print(total_score["K2"])

K1    21
K2     8
K3    13
dtype: int64

8


## DataFrame

In [7]:
data = np.random.randn(4,3)
data

array([[ 0.22651008,  1.22751232, -0.0781857 ],
       [ 0.75999215,  0.64938266, -0.69551747],
       [-0.86739777, -1.89064463, -0.56809678],
       [ 0.04946857,  1.57808351, -0.54718039]])

In [8]:
df = pd.DataFrame(data)
df

Unnamed: 0,0,1,2
0,0.22651,1.227512,-0.078186
1,0.759992,0.649383,-0.695517
2,-0.867398,-1.890645,-0.568097
3,0.049469,1.578084,-0.54718


In [9]:
df[0] # kolonları getirir

0    0.226510
1    0.759992
2   -0.867398
3    0.049469
Name: 0, dtype: float64

In [10]:
new_df = pd.DataFrame(data, index=["A0", "A1", "A2", "A3"], columns=["salary", "age", "working_hours"])
new_df

Unnamed: 0,salary,age,working_hours
A0,0.22651,1.227512,-0.078186
A1,0.759992,0.649383,-0.695517
A2,-0.867398,-1.890645,-0.568097
A3,0.049469,1.578084,-0.54718


In [11]:
new_df[["salary", "age"]]

Unnamed: 0,salary,age
A0,0.22651,1.227512
A1,0.759992,0.649383
A2,-0.867398,-1.890645
A3,0.049469,1.578084


In [12]:
# location
new_df.loc["A0"]

salary           0.226510
age              1.227512
working_hours   -0.078186
Name: A0, dtype: float64

In [13]:
# index bazlı location
new_df.iloc[0]

salary           0.226510
age              1.227512
working_hours   -0.078186
Name: A0, dtype: float64

In [14]:
new_df["retirement_age"] = new_df["age"] * 2
new_df

Unnamed: 0,salary,age,working_hours,retirement_age
A0,0.22651,1.227512,-0.078186,2.455025
A1,0.759992,0.649383,-0.695517,1.298765
A2,-0.867398,-1.890645,-0.568097,-3.781289
A3,0.049469,1.578084,-0.54718,3.156167


In [15]:
new_df.drop("retirement_age", axis=1) # axis=1 -> sütun

Unnamed: 0,salary,age,working_hours
A0,0.22651,1.227512,-0.078186
A1,0.759992,0.649383,-0.695517
A2,-0.867398,-1.890645,-0.568097
A3,0.049469,1.578084,-0.54718


In [16]:
new_df.drop("A3", axis=0) # axis=0 -> satır

Unnamed: 0,salary,age,working_hours,retirement_age
A0,0.22651,1.227512,-0.078186,2.455025
A1,0.759992,0.649383,-0.695517,1.298765
A2,-0.867398,-1.890645,-0.568097,-3.781289


In [17]:
# Sildiklerimiz ugulanmadı. Değişikliklerimizin df içerisinde uygulanması için;
# df = new_df.drop("retirement_age", axis = 1) yada,
new_df.drop("retirement_age", axis = 1, inplace=True) # yapabiliriz

In [18]:
new_df

Unnamed: 0,salary,age,working_hours
A0,0.22651,1.227512,-0.078186
A1,0.759992,0.649383,-0.695517
A2,-0.867398,-1.890645,-0.568097
A3,0.049469,1.578084,-0.54718


In [19]:
print(new_df.loc["A0"]["age"])
print()
print(new_df.loc["A0", "age"])

1.2275123215454535

1.2275123215454535


In [20]:
new_df < 0

Unnamed: 0,salary,age,working_hours
A0,False,False,True
A1,False,False,True
A2,True,True,True
A3,False,False,True


In [21]:
new_df[new_df < 0]

Unnamed: 0,salary,age,working_hours
A0,,,-0.078186
A1,,,-0.695517
A2,-0.867398,-1.890645,-0.568097
A3,,,-0.54718


In [22]:
new_df[new_df["salary"] > 0]

Unnamed: 0,salary,age,working_hours
A0,0.22651,1.227512,-0.078186
A1,0.759992,0.649383,-0.695517
A3,0.049469,1.578084,-0.54718


In [23]:
new_df.reset_index()

Unnamed: 0,index,salary,age,working_hours
0,A0,0.22651,1.227512,-0.078186
1,A1,0.759992,0.649383,-0.695517
2,A2,-0.867398,-1.890645,-0.568097
3,A3,0.049469,1.578084,-0.54718


In [24]:
new_index_list = ["a0", "a1", "a2", "a3"]
new_df["new_index"] = new_index_list

In [25]:
new_df

Unnamed: 0,salary,age,working_hours,new_index
A0,0.22651,1.227512,-0.078186,a0
A1,0.759992,0.649383,-0.695517,a1
A2,-0.867398,-1.890645,-0.568097,a2
A3,0.049469,1.578084,-0.54718,a3


In [26]:
new_df = new_df.set_index("new_index")

In [27]:
new_df

Unnamed: 0_level_0,salary,age,working_hours
new_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a0,0.22651,1.227512,-0.078186
a1,0.759992,0.649383,-0.695517
a2,-0.867398,-1.890645,-0.568097
a3,0.049469,1.578084,-0.54718


In [28]:
dıs_index = ["A","A","A","B","B","B"]
ic_index = ["a1","a2","a3","b1","b2","b3"]

birlesmis_index = list(zip(dıs_index, ic_index))
birlesmis_index

[('A', 'a1'), ('A', 'a2'), ('A', 'a3'), ('B', 'b1'), ('B', 'b2'), ('B', 'b3')]

In [29]:
multiIndex = pd.MultiIndex.from_tuples(birlesmis_index)
multiIndex

MultiIndex([('A', 'a1'),
            ('A', 'a2'),
            ('A', 'a3'),
            ('B', 'b1'),
            ('B', 'b2'),
            ('B', 'b3')],
           )

In [30]:
my_list = [[40,6],[24,2],[52,10],[15,1],[63,15],[35,5]]
my_df = pd.DataFrame(my_list, index=multiIndex, columns=["Age", "Experience"])
my_df

Unnamed: 0,Unnamed: 1,Age,Experience
A,a1,40,6
A,a2,24,2
A,a3,52,10
B,b1,15,1
B,b2,63,15
B,b3,35,5


## Operations

In [31]:
weather_dict = {
    "Istanbul": [30,29,np.nan,33],
    "Ankara": [np.nan,np.nan,25,23],
    "Izmır": [40,39,38,37],
    "Kocaeli": [np.nan,28,29,30]
}

weather_df = pd.DataFrame(weather_dict)
weather_df

Unnamed: 0,Istanbul,Ankara,Izmır,Kocaeli
0,30.0,,40,
1,29.0,,39,28.0
2,,25.0,38,29.0
3,33.0,23.0,37,30.0


In [32]:
# Eksik veriler ile çalışmak
weather_df.dropna()

Unnamed: 0,Istanbul,Ankara,Izmır,Kocaeli
3,33.0,23.0,37,30.0


In [33]:
weather_df.dropna(axis=1)

Unnamed: 0,Izmır
0,40
1,39
2,38
3,37


In [34]:
weather_df.dropna(axis = 1, thresh=3) # sütuna bakar, 2 den fazla NaN olanı siler

Unnamed: 0,Istanbul,Izmır,Kocaeli
0,30.0,40,
1,29.0,39,28.0
2,,38,29.0
3,33.0,37,30.0


In [35]:
weather_df.fillna(20)

Unnamed: 0,Istanbul,Ankara,Izmır,Kocaeli
0,30.0,20.0,40,20.0
1,29.0,20.0,39,28.0
2,20.0,25.0,38,29.0
3,33.0,23.0,37,30.0


### Groupby

In [36]:
maas_dict = {
    "department": ["Yazılım","Yazılım","Pazarlama","Hukuk","Hukuk","Pazarlama"],
    "name": ["Ahmet","Ali","Ayşe","Hakan","Kemal","Fatma"],
    "salary": [200,300,100,150,200,150]
}

maas_df = pd.DataFrame(maas_dict)
maas_df

Unnamed: 0,department,name,salary
0,Yazılım,Ahmet,200
1,Yazılım,Ali,300
2,Pazarlama,Ayşe,100
3,Hukuk,Hakan,150
4,Hukuk,Kemal,200
5,Pazarlama,Fatma,150


In [37]:
group = maas_df.groupby("department")

In [38]:
group.count()

Unnamed: 0_level_0,name,salary
department,Unnamed: 1_level_1,Unnamed: 2_level_1
Hukuk,2,2
Pazarlama,2,2
Yazılım,2,2


In [39]:
group.max()

Unnamed: 0_level_0,name,salary
department,Unnamed: 1_level_1,Unnamed: 2_level_1
Hukuk,Kemal,200
Pazarlama,Fatma,150
Yazılım,Ali,300


In [40]:
group.mean(numeric_only=True)

Unnamed: 0_level_0,salary
department,Unnamed: 1_level_1
Hukuk,175.0
Pazarlama,125.0
Yazılım,250.0


In [41]:
group.describe()

Unnamed: 0_level_0,salary,salary,salary,salary,salary,salary,salary,salary
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
department,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Hukuk,2.0,175.0,35.355339,150.0,162.5,175.0,187.5,200.0
Pazarlama,2.0,125.0,35.355339,100.0,112.5,125.0,137.5,150.0
Yazılım,2.0,250.0,70.710678,200.0,225.0,250.0,275.0,300.0


### Concat

In [42]:
dict_1 = {
    "Isım": ["Ahmet", "Mehmet","Zeynep","Veli"],
    "Spor": ["Koşu", "Yüzme", "Koşu","Basketbol"],
    "Kalori" : [100,200,300,400]
}

df_1 = pd.DataFrame(dict_1, index = [0,1,2,3])

dict_2 = {
    "Isım": ["Mahmut", "Kerim", "Ayşe", "Ozan"],
    "Spor": ["Futbol", "Basketbol", "Tenis", "Voleybol"],
    "Kalori": [200,300,100,200]
}

df_2 = pd.DataFrame(dict_2, index = [4,5,6,7])

In [43]:
df_1

Unnamed: 0,Isım,Spor,Kalori
0,Ahmet,Koşu,100
1,Mehmet,Yüzme,200
2,Zeynep,Koşu,300
3,Veli,Basketbol,400


In [44]:
df_2

Unnamed: 0,Isım,Spor,Kalori
4,Mahmut,Futbol,200
5,Kerim,Basketbol,300
6,Ayşe,Tenis,100
7,Ozan,Voleybol,200


In [45]:
# concatenation
df_concat = pd.concat(objs=[df_1, df_2])
df_concat

Unnamed: 0,Isım,Spor,Kalori
0,Ahmet,Koşu,100
1,Mehmet,Yüzme,200
2,Zeynep,Koşu,300
3,Veli,Basketbol,400
4,Mahmut,Futbol,200
5,Kerim,Basketbol,300
6,Ayşe,Tenis,100
7,Ozan,Voleybol,200


### merge

In [46]:
dict_1 = {
    "Isım": ["a","b","c","d"],
    "Spor": ["top","koş","yüz","atla"]
}

dict_2 = {
    "Isım": ["a","b","c","d"],
    "Kalori": [5,10,15,20]
}

df1 = pd.DataFrame(dict_1)
df2 = pd.DataFrame(dict_2)

In [47]:
df1

Unnamed: 0,Isım,Spor
0,a,top
1,b,koş
2,c,yüz
3,d,atla


In [48]:
df2

Unnamed: 0,Isım,Kalori
0,a,5
1,b,10
2,c,15
3,d,20


In [49]:
merge_df = pd.merge(df1, df2, on="Isım")
merge_df

Unnamed: 0,Isım,Spor,Kalori
0,a,top,5
1,b,koş,10
2,c,yüz,15
3,d,atla,20


In [50]:
maas_df["department"].unique()

array(['Yazılım', 'Pazarlama', 'Hukuk'], dtype=object)

In [51]:
maas_df["department"].nunique()

3

In [52]:
maas_df["department"].value_counts()

department
Yazılım      2
Pazarlama    2
Hukuk        2
Name: count, dtype: int64

In [53]:
maas_df.salary

0    200
1    300
2    100
3    150
4    200
5    150
Name: salary, dtype: int64

In [54]:
def func(maas):
    return maas * 0.66

# apply
maas_df["salary"].apply(func)

0    132.0
1    198.0
2     66.0
3     99.0
4    132.0
5     99.0
Name: salary, dtype: float64

In [55]:
maas_df.isnull().sum()

department    0
name          0
salary        0
dtype: int64

In [56]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       4 non-null      float64
 1   1       4 non-null      float64
 2   2       4 non-null      float64
dtypes: float64(3)
memory usage: 228.0 bytes


In [57]:
maas_df.pivot_table(values="salary", index=["department", "name"])

Unnamed: 0_level_0,Unnamed: 1_level_0,salary
department,name,Unnamed: 2_level_1
Hukuk,Hakan,150.0
Hukuk,Kemal,200.0
Pazarlama,Ayşe,100.0
Pazarlama,Fatma,150.0
Yazılım,Ahmet,200.0
Yazılım,Ali,300.0


In [58]:
# excel ile çalışmak
#pd.read_csv("")
#pd.read_excel("")

# kaydetme
#df.to_excel("a.xlsx")
#df.to_csv(a.csv)