# PANDAS

In [1]:
import pandas as pd

## Creating Series

In [6]:
a = pd.Series([5,10,15,20,25])

In [7]:
type(a)

pandas.core.series.Series

In [9]:
a.axes

[RangeIndex(start=0, stop=5, step=1)]

In [10]:
a.dtype

dtype('int64')

In [11]:
a.size

5

In [12]:
a.shape

(5,)

In [13]:
a.ndim

1

In [14]:
a.values

array([ 5, 10, 15, 20, 25])

In [16]:
a.head

<bound method NDFrame.head of 0     5
1    10
2    15
3    20
4    25
dtype: int64>

In [18]:
a.head(2)

0     5
1    10
dtype: int64

In [20]:
a.tail(2)

3    20
4    25
dtype: int64

In [22]:
# index naming
b = pd.Series([99,88,77,66,55], index = [9,8,7,6,5])
b

9    99
8    88
7    77
6    66
5    55
dtype: int64

In [23]:
b[7]

77

In [32]:
dict = {"reg":10,"log":11,"cart":12}
d = pd.Series(dict)
d

reg     10
log     11
cart    12
dtype: int64

In [33]:
pd.concat([d,d])

reg     10
log     11
cart    12
reg     10
log     11
cart    12
dtype: int64

In [38]:
d.index

Index(['reg', 'log', 'cart'], dtype='object')

In [40]:
d.keys

<bound method Series.keys of reg     10
log     11
cart    12
dtype: int64>

In [41]:
list(d.items())

[('reg', 10), ('log', 11), ('cart', 12)]

In [42]:
d.values

array([10, 11, 12])

In [43]:
"reg" in d

True

In [44]:
5 in d

False

In [47]:
d[["log","cart"]]

log     11
cart    12
dtype: int64

In [48]:
d["log"] = 130
d

reg      10
log     130
cart     12
dtype: int64

In [49]:
d["reg":"log"]

reg     10
log    130
dtype: int64

## DataFrame

In [50]:
import pandas as pd

In [52]:
l = [1,2,36,78,90]
l

[1, 2, 36, 78, 90]

In [53]:
pd.DataFrame(l, columns = ["variable_name"])

Unnamed: 0,variable_name
0,1
1,2
2,36
3,78
4,90


In [58]:
import numpy as np
m = np.arange(1,10).reshape(3,3)

In [90]:
df = pd.DataFrame(m, columns = ["deg1","deg2","deg3"])
df

Unnamed: 0,deg1,deg2,deg3
0,1,2,3
1,4,5,6
2,7,8,9


In [91]:
 # naming df

In [92]:
df = pd.DataFrame(m, columns = ["deg1","deg2","deg3"])
df.head()

Unnamed: 0,deg1,deg2,deg3
0,1,2,3
1,4,5,6
2,7,8,9


In [93]:
df.columns = ("var1","var2","var3")
df

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [72]:
type(df)

pandas.core.frame.DataFrame

In [73]:
df.axes

[RangeIndex(start=0, stop=3, step=1),
 Index(['var1', 'var2', 'var3'], dtype='object')]

In [74]:
df.tail(2)

Unnamed: 0,var1,var2,var3
1,4,5,6
2,7,8,9


In [76]:
df.shape

(3, 3)

In [77]:
df.ndim

2

In [78]:
df.size

9

In [79]:
df.values

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [80]:
type(df.values)

numpy.ndarray

**drop() : deletes but makes no changes.**

In [94]:
#delete
df.drop(1)

Unnamed: 0,var1,var2,var3
0,1,2,3
2,7,8,9


In [95]:
df

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


**drop(inplace = True) : makes changes**

In [96]:
df.drop(1, inplace = True)

In [97]:
df

Unnamed: 0,var1,var2,var3
0,1,2,3
2,7,8,9


## Selection of Variables and Observations : loc & iloc

In [98]:
import numpy as np
import pandas as pd
m = np.random.randint(1,30, size = (10,3))
df = pd.DataFrame(m, columns = ["var1","var2","var3"])
df

Unnamed: 0,var1,var2,var3
0,6,15,24
1,4,11,2
2,24,14,19
3,12,17,25
4,12,15,9
5,13,27,10
6,10,24,16
7,5,10,27
8,24,6,22
9,12,14,5


 - **loc : tanımlandığı şekli ile seçim yapmak için kullanılır.**

In [99]:
df.loc[1:3]

Unnamed: 0,var1,var2,var3
1,4,11,2
2,24,14,19
3,12,17,25


- **iloc: alışık olduğumuz indeksleme mantığı ile seçim yapar.**

In [100]:
df.iloc[1:3]

Unnamed: 0,var1,var2,var3
1,4,11,2
2,24,14,19


In [102]:
df.loc[1:3,"var3"]

1     2
2    19
3    25
Name: var3, dtype: int64

In [104]:
df.iloc[1:3,"var3"] #error

ValueError: Location based indexing can only have [integer, integer slice (START point is INCLUDED, END point is EXCLUDED), listlike of integers, boolean array] types

In [105]:
df.iloc[1:3]["var3"]

1     2
2    19
Name: var3, dtype: int64

## Conditional Element Operations

In [109]:
df

Unnamed: 0,var1,var2,var3
0,6,15,24
1,4,11,2
2,24,14,19
3,12,17,25
4,12,15,9
5,13,27,10
6,10,24,16
7,5,10,27
8,24,6,22
9,12,14,5


In [106]:
df.var1

0     6
1     4
2    24
3    12
4    12
5    13
6    10
7     5
8    24
9    12
Name: var1, dtype: int64

In [107]:
df[df.var1 > 15]

Unnamed: 0,var1,var2,var3
2,24,14,19
8,24,6,22


In [108]:
df[df.var1 > 15]["var1"]

2    24
8    24
Name: var1, dtype: int64

In [113]:
df[(df.var1 > 5) & (df.var3 < 10)]

Unnamed: 0,var1,var2,var3
4,12,15,9
9,12,14,5


## Join Operations

In [117]:
import numpy as np
import pandas as pd
m = np.random.randint(1,30, size = (5,3))
df1 = pd.DataFrame(m, columns = ["var1","var2","var3"])
df1

Unnamed: 0,var1,var2,var3
0,17,9,9
1,9,20,28
2,2,14,6
3,12,12,20
4,13,21,24


In [118]:
df2 = df1 + 88
df2

Unnamed: 0,var1,var2,var3
0,105,97,97
1,97,108,116
2,90,102,94
3,100,100,108
4,101,109,112


In [120]:
pd.concat([df1,df2]) #indexler sıralı değil

Unnamed: 0,var1,var2,var3
0,17,9,9
1,9,20,28
2,2,14,6
3,12,12,20
4,13,21,24
0,105,97,97
1,97,108,116
2,90,102,94
3,100,100,108
4,101,109,112


In [121]:
pd.concat([df1,df2], ignore_index = True) #indexler sıralı

Unnamed: 0,var1,var2,var3
0,17,9,9
1,9,20,28
2,2,14,6
3,12,12,20
4,13,21,24
5,105,97,97
6,97,108,116
7,90,102,94
8,100,100,108
9,101,109,112


In [122]:
df1.columns

Index(['var1', 'var2', 'var3'], dtype='object')

In [123]:
df2.columns = ["var1","var2","deg3"]
df2

Unnamed: 0,var1,var2,deg3
0,105,97,97
1,97,108,116
2,90,102,94
3,100,100,108
4,101,109,112


In [124]:
pd.concat([df1,df2])

Unnamed: 0,var1,var2,var3,deg3
0,17,9,9.0,
1,9,20,28.0,
2,2,14,6.0,
3,12,12,20.0,
4,13,21,24.0,
0,105,97,,97.0
1,97,108,,116.0
2,90,102,,94.0
3,100,100,,108.0
4,101,109,,112.0


In [125]:
pd.concat([df1, df2], join = "inner") #kesişimleri işleme sokar

Unnamed: 0,var1,var2
0,17,9
1,9,20
2,2,14
3,12,12
4,13,21
0,105,97
1,97,108
2,90,102
3,100,100
4,101,109


## Advanced Join Operations

In [128]:
df1 = pd.DataFrame({"employees" : ["Toby","Andrew","Tom"],
                   "group" : ["IT","HR","Engineer"]})
df1

Unnamed: 0,employees,group
0,Toby,IT
1,Andrew,HR
2,Tom,Engineer


In [129]:
df2 = pd.DataFrame({"employees" : ["Toby","Andrew","Tom"],
                   "first_year" : ["1999","2005","2018"]})
df2

Unnamed: 0,employees,first_year
0,Toby,1999
1,Andrew,2005
2,Tom,2018


In [130]:
pd.merge(df1,df2)

Unnamed: 0,employees,group,first_year
0,Toby,IT,1999
1,Andrew,HR,2005
2,Tom,Engineer,2018


In [131]:
pd.merge(df1,df2, on = "employees")

Unnamed: 0,employees,group,first_year
0,Toby,IT,1999
1,Andrew,HR,2005
2,Tom,Engineer,2018


In [133]:
df3 = pd.merge(df1,df2)
df3

Unnamed: 0,employees,group,first_year
0,Toby,IT,1999
1,Andrew,HR,2005
2,Tom,Engineer,2018


In [134]:
df4 = pd.DataFrame({"employees" : ["Toby","Andrew","Tom"],
                   "boss" : ["Robert","Brad","Chris"]})
df4

Unnamed: 0,employees,boss
0,Toby,Robert
1,Andrew,Brad
2,Tom,Chris


In [135]:
pd.merge(df3,df4)

Unnamed: 0,employees,group,first_year,boss
0,Toby,IT,1999,Robert
1,Andrew,HR,2005,Brad
2,Tom,Engineer,2018,Chris


In [139]:
df5 = pd.DataFrame({"group" : ["IT","IT","HR","Engineer","Engineer"],
                   "skills": ["software","linux","excel","math","design"]})
df5

Unnamed: 0,group,skills
0,IT,software
1,IT,linux
2,HR,excel
3,Engineer,math
4,Engineer,design


In [140]:
df1

Unnamed: 0,employees,group
0,Toby,IT
1,Andrew,HR
2,Tom,Engineer


In [141]:
pd.merge(df1,df5)

Unnamed: 0,employees,group,skills
0,Toby,IT,software
1,Toby,IT,linux
2,Andrew,HR,excel
3,Tom,Engineer,math
4,Tom,Engineer,design
