In [1]:
import numpy as np
import pandas as pd

In [2]:
pd.__version__

'2.1.4'

In [3]:
pd.Series?

In [4]:
series = pd.Series([1000, "Ali", 33.22, np.nan, True])
series

0     1000
1      Ali
2    33.22
3      NaN
4     True
dtype: object

In [5]:
#List the index of the series
series.index

RangeIndex(start=0, stop=5, step=1)

In [6]:
#List the values (column) of the series
print(series.values)

[1000 'Ali' 33.22 nan True]


In [7]:
#possible to change the index lables of a pandas Series object
series.index = ['a', 'b', 'c', 'd', 'e']
series

a     1000
b      Ali
c    33.22
d      NaN
e     True
dtype: object

In [8]:
list(series.index)

['a', 'b', 'c', 'd', 'e']

In [9]:
#The passed index is a list of axis labels.
#Thus, this separates into a few cases depending on what data is:
series = pd.Series([1000, 20000, 3000], index={'Ali', 'Veli', 'Ayse'})
print(series)

Ayse     1000
Veli    20000
Ali      3000
dtype: int64


In [10]:
#Series can be instantiated from dicts:
dic = {'Ali': 67, 'Veli': 88, 'Ayse':94, 'Can':34, 'Zeynep': 45}
series = pd.Series(dic)
series

Ali       67
Veli      88
Ayse      94
Can       34
Zeynep    45
dtype: int64

* Series acts very similarly to a ndarray, and is a valid argument to most NumPy functions. However, operations such as slicing will also slice the index.

In [11]:
#display index
series[0]

  series[0]


67

In [12]:
series[-1]

  series[-1]


45

In [13]:
series[-3:]

Ayse      94
Can       34
Zeynep    45
dtype: int64

In [14]:
series[0:2]

Ali     67
Veli    88
dtype: int64

    * We will addresses array-based indexing like series [[3, 1, 0]] in selection on indexing

In [15]:
series[[3, 1, 0]]

  series[[3, 1, 0]]


Can     34
Veli    88
Ali     67
dtype: int64

In [16]:
series['Veli']

88

In [17]:
series[['Veli', 'Can']]

Veli    88
Can     34
dtype: int64

In [18]:
series['Ali':'Can']

Ali     67
Veli    88
Ayse    94
Can     34
dtype: int64

In [19]:
series['Veli':'Can'] = 89
series

Ali       67
Veli      89
Ayse      89
Can       89
Zeynep    45
dtype: int64

In [20]:
series['Ayse'] = 98
series

Ali       67
Veli      89
Ayse      98
Can       89
Zeynep    45
dtype: int64

In [21]:
series = pd.Series([1000, 'Ali', 33.22, np.nan, True])
series

0     1000
1      Ali
2    33.22
3      NaN
4     True
dtype: object

isna() and isnull() both do not detect empty strings. "So you need to apply different functions to treat empty strings."

In [22]:
series.isna()

0    False
1    False
2    False
3     True
4    False
dtype: bool

In [23]:
series.isnull()

0    False
1    False
2    False
3     True
4    False
dtype: bool

In [24]:
dic = {'Ali': 67, 'Veli': 88, 'Ayse':94, 'Can':34, 'Zeynep': 45, 'Zehra': 74}
series = pd.Series(dic)
series

Ali       67
Veli      88
Ayse      94
Can       34
Zeynep    45
Zehra     74
dtype: int64

In [25]:
#Return the Largest 'n' elements, default n = 5
series.nlargest()
series[3]

  series[3]


34

In [26]:
series.nlargest(3)

Ayse     94
Veli     88
Zehra    74
dtype: int64

In [27]:
# can work with NumPy arrays
longSeries = pd.Series(np.random.randn(1000))
longSeries

0      1.471407
1      0.820035
2     -1.075878
3     -1.541317
4      1.352578
         ...   
995    0.740084
996   -2.275836
997    1.809179
998   -0.225003
999   -0.143215
Length: 1000, dtype: float64

In [28]:
longSeries.head()

0    1.471407
1    0.820035
2   -1.075878
3   -1.541317
4    1.352578
dtype: float64

In [29]:
longSeries.head(10)

0    1.471407
1    0.820035
2   -1.075878
3   -1.541317
4    1.352578
5   -0.944411
6    1.012554
7    1.348635
8   -0.141414
9    2.100011
dtype: float64

In [30]:
longSeries.tail(3)

997    1.809179
998   -0.225003
999   -0.143215
dtype: float64

# Data Frame 

In [31]:
s1 = pd.Series(np.random.randn(4))
s2 = pd.Series(np.random.randn(5))
dic = {'one': s1, 'two': s2}
df = pd.DataFrame(dic)
df

Unnamed: 0,one,two
0,2.387379,1.186236
1,0.525966,0.140812
2,0.672092,-0.469185
3,-1.290118,-0.135606
4,,0.092139


In [32]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [33]:
df.values

array([[ 2.38737871,  1.1862358 ],
       [ 0.52596606,  0.14081247],
       [ 0.6720923 , -0.46918459],
       [-1.29011794, -0.13560611],
       [        nan,  0.09213942]])

In [34]:
s1 = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
s2 = pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd'])
dic = {'one': s1, 'two': s2}
df = pd.DataFrame(dic)
df

Unnamed: 0,one,two
a,0.559455,-0.325588
b,-1.870126,0.669804
c,1.495279,1.208667
d,0.090996,-0.059892
e,2.180371,


In [35]:
df.columns

Index(['one', 'two'], dtype='object')

In [36]:
data = {
    'Brand': ['Ford', 'Maserati', 'BMW', 'Ford', 'Lamborghini', 'Ferrari'],
    'Electric': [False, True, False, True, True, False],
    'Year': [1964, 1970, 1980, 1970, 1969, 1980],
    'Price': [370.5, 234.6, 210.03, 347.9, 543.7, 784.1],
    'Colors': ['red', 'white', 'black', 'white', 'blue', 'red']
}


In [37]:
data

{'Brand': ['Ford', 'Maserati', 'BMW', 'Ford', 'Lamborghini', 'Ferrari'],
 'Electric': [False, True, False, True, True, False],
 'Year': [1964, 1970, 1980, 1970, 1969, 1980],
 'Price': [370.5, 234.6, 210.03, 347.9, 543.7, 784.1],
 'Colors': ['red', 'white', 'black', 'white', 'blue', 'red']}

In [38]:
df = pd.DataFrame(data)
df

Unnamed: 0,Brand,Electric,Year,Price,Colors
0,Ford,False,1964,370.5,red
1,Maserati,True,1970,234.6,white
2,BMW,False,1980,210.03,black
3,Ford,True,1970,347.9,white
4,Lamborghini,True,1969,543.7,blue
5,Ferrari,False,1980,784.1,red


In [39]:
df.head(2)

Unnamed: 0,Brand,Electric,Year,Price,Colors
0,Ford,False,1964,370.5,red
1,Maserati,True,1970,234.6,white


In [40]:
newCol = ['electric', 'hybrid', 'petrol', 'hybrid', 'petrol', 'electric']
df['Fuel'] = newCol
df

Unnamed: 0,Brand,Electric,Year,Price,Colors,Fuel
0,Ford,False,1964,370.5,red,electric
1,Maserati,True,1970,234.6,white,hybrid
2,BMW,False,1980,210.03,black,petrol
3,Ford,True,1970,347.9,white,hybrid
4,Lamborghini,True,1969,543.7,blue,petrol
5,Ferrari,False,1980,784.1,red,electric


In [41]:
pd.DataFrame(data, index=['one', 'two', 'three', 'four', 'five', 'six'])


Unnamed: 0,Brand,Electric,Year,Price,Colors
one,Ford,False,1964,370.5,red
two,Maserati,True,1970,234.6,white
three,BMW,False,1980,210.03,black
four,Ford,True,1970,347.9,white
five,Lamborghini,True,1969,543.7,blue
six,Ferrari,False,1980,784.1,red


In [42]:
df.Year

0    1964
1    1970
2    1980
3    1970
4    1969
5    1980
Name: Year, dtype: int64

In [43]:
df[1:3]

Unnamed: 0,Brand,Electric,Year,Price,Colors,Fuel
1,Maserati,True,1970,234.6,white,hybrid
2,BMW,False,1980,210.03,black,petrol


In [44]:
df['Brand']

0           Ford
1       Maserati
2            BMW
3           Ford
4    Lamborghini
5        Ferrari
Name: Brand, dtype: object

In [45]:
df[['Brand', 'Year']]

Unnamed: 0,Brand,Year
0,Ford,1964
1,Maserati,1970
2,BMW,1980
3,Ford,1970
4,Lamborghini,1969
5,Ferrari,1980


In [46]:
scores={
    'Math': {'Ali': 85, 'Berk': 90, 'Can':95},
    'Physics': {'Ali': 90, 'Berk': 80, 'Can':75}
}
scores

{'Math': {'Ali': 85, 'Berk': 90, 'Can': 95},
 'Physics': {'Ali': 90, 'Berk': 80, 'Can': 75}}

In [47]:
dfScores = pd.DataFrame(scores)
dfScores

Unnamed: 0,Math,Physics
Ali,85,90
Berk,90,80
Can,95,75


In [48]:
dfScores.index.name='Name'
dfScores

Unnamed: 0_level_0,Math,Physics
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ali,85,90
Berk,90,80
Can,95,75


In [49]:
dfScores.columns

Index(['Math', 'Physics'], dtype='object')

In [50]:
dfScores.columns.name='Lesson'
dfScores

Lesson,Math,Physics
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ali,85,90
Berk,90,80
Can,95,75


In [51]:
dfScores1 = dfScores.reindex(['Berk', 'Ali', 'Can'])
dfScores1

Lesson,Math,Physics
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Berk,90,80
Ali,85,90
Can,95,75


In [52]:
lessons=['Physics', 'Math']
dfScores.reindex(columns=lessons)

Lesson,Physics,Math
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ali,90,85
Berk,80,90
Can,75,95


In [53]:
series

Ali       67
Veli      88
Ayse      94
Can       34
Zeynep    45
Zehra     74
dtype: int64

In [54]:
series.iloc[0]

67

In [55]:
series.iloc[[False,True,False,True,True,False]]

Veli      88
Can       34
Zeynep    45
dtype: int64

In [56]:
df

Unnamed: 0,Brand,Electric,Year,Price,Colors,Fuel
0,Ford,False,1964,370.5,red,electric
1,Maserati,True,1970,234.6,white,hybrid
2,BMW,False,1980,210.03,black,petrol
3,Ford,True,1970,347.9,white,hybrid
4,Lamborghini,True,1969,543.7,blue,petrol
5,Ferrari,False,1980,784.1,red,electric


In [57]:
df.iloc[1]

Brand       Maserati
Electric        True
Year            1970
Price          234.6
Colors         white
Fuel          hybrid
Name: 1, dtype: object

In [58]:
df.iloc[1:3]

Unnamed: 0,Brand,Electric,Year,Price,Colors,Fuel
1,Maserati,True,1970,234.6,white,hybrid
2,BMW,False,1980,210.03,black,petrol


In [59]:
df.iloc[[0,3]]

Unnamed: 0,Brand,Electric,Year,Price,Colors,Fuel
0,Ford,False,1964,370.5,red,electric
3,Ford,True,1970,347.9,white,hybrid


In [60]:
df.iloc[[True, False, True, False, True, False]]

Unnamed: 0,Brand,Electric,Year,Price,Colors,Fuel
0,Ford,False,1964,370.5,red,electric
2,BMW,False,1980,210.03,black,petrol
4,Lamborghini,True,1969,543.7,blue,petrol


In [61]:
df.iloc[lambda x: x.index % 2 == 0]

Unnamed: 0,Brand,Electric,Year,Price,Colors,Fuel
0,Ford,False,1964,370.5,red,electric
2,BMW,False,1980,210.03,black,petrol
4,Lamborghini,True,1969,543.7,blue,petrol


In [62]:
df.iloc[1,3]

234.6

In [63]:
df.iloc[[1,2], [0,3]]

Unnamed: 0,Brand,Price
1,Maserati,234.6
2,BMW,210.03


In [64]:
df.iloc[1:4, 0:3]

Unnamed: 0,Brand,Electric,Year
1,Maserati,True,1970
2,BMW,False,1980
3,Ford,True,1970


In [65]:
df.iloc[1, [0,3]]

Brand    Maserati
Price       234.6
Name: 1, dtype: object

In [66]:
df.iloc[:, [True, True, True, False, False, True]]

Unnamed: 0,Brand,Electric,Year,Fuel
0,Ford,False,1964,electric
1,Maserati,True,1970,hybrid
2,BMW,False,1980,petrol
3,Ford,True,1970,hybrid
4,Lamborghini,True,1969,petrol
5,Ferrari,False,1980,electric


In [67]:
df.iloc[:, lambda df: [0, 2]]

Unnamed: 0,Brand,Year
0,Ford,1964
1,Maserati,1970
2,BMW,1980
3,Ford,1970
4,Lamborghini,1969
5,Ferrari,1980


In [68]:
series

Ali       67
Veli      88
Ayse      94
Can       34
Zeynep    45
Zehra     74
dtype: int64

In [69]:
series.loc['Ali']

67

In [70]:
series.loc[: 'Ayse']

Ali     67
Veli    88
Ayse    94
dtype: int64

In [71]:
series.loc['Ali' :]

Ali       67
Veli      88
Ayse      94
Can       34
Zeynep    45
Zehra     74
dtype: int64

In [72]:
series.loc[['Ali', 'Can']]

Ali    67
Can    34
dtype: int64

In [73]:
df.loc[1]

Brand       Maserati
Electric        True
Year            1970
Price          234.6
Colors         white
Fuel          hybrid
Name: 1, dtype: object

In [74]:
df=pd.DataFrame(data, columns=['Brand', 'Year', 'Colors', 'Price'],
               index=['one', 'two', 'three', 'four', 'five', 'six'])

In [75]:
df.loc['one', 'Colors']

'red'

In [76]:
df.loc[['one', 'three']]

Unnamed: 0,Brand,Year,Colors,Price
one,Ford,1964,red,370.5
three,BMW,1980,black,210.03


In [77]:
'Ali' in series

True

In [78]:
'Year' in df

True

In [79]:
'one' in df

False

In [80]:
df

Unnamed: 0,Brand,Year,Colors,Price
one,Ford,1964,red,370.5
two,Maserati,1970,white,234.6
three,BMW,1980,black,210.03
four,Ford,1970,white,347.9
five,Lamborghini,1969,blue,543.7
six,Ferrari,1980,red,784.1


In [81]:
df.loc[pd.Series([True, True, False, False, True, True],
                index=['one', 'two', 'three', 'four', 'five', 'six'])]

Unnamed: 0,Brand,Year,Colors,Price
one,Ford,1964,red,370.5
two,Maserati,1970,white,234.6
five,Lamborghini,1969,blue,543.7
six,Ferrari,1980,red,784.1


In [82]:
df.loc[df['Price'] > 250]

Unnamed: 0,Brand,Year,Colors,Price
one,Ford,1964,red,370.5
four,Ford,1970,white,347.9
five,Lamborghini,1969,blue,543.7
six,Ferrari,1980,red,784.1


In [83]:
df.loc[df['Price'] > 250, ['Colors']]

Unnamed: 0,Colors
one,red
four,white
five,blue
six,red


In [84]:
df.loc[['one', 'four'], ['Year']] = 1960
df

Unnamed: 0,Brand,Year,Colors,Price
one,Ford,1960,red,370.5
two,Maserati,1970,white,234.6
three,BMW,1980,black,210.03
four,Ford,1960,white,347.9
five,Lamborghini,1969,blue,543.7
six,Ferrari,1980,red,784.1


## Querying a DataFrame

In [85]:
df

Unnamed: 0,Brand,Year,Colors,Price
one,Ford,1960,red,370.5
two,Maserati,1970,white,234.6
three,BMW,1980,black,210.03
four,Ford,1960,white,347.9
five,Lamborghini,1969,blue,543.7
six,Ferrari,1980,red,784.1


In [86]:
df['Price'] > 250

one       True
two      False
three    False
four      True
five      True
six       True
Name: Price, dtype: bool

In [87]:
df[df['Price'] > 250]

Unnamed: 0,Brand,Year,Colors,Price
one,Ford,1960,red,370.5
four,Ford,1960,white,347.9
five,Lamborghini,1969,blue,543.7
six,Ferrari,1980,red,784.1


In [88]:
tmpDf = df.where(df['Price'] > 250)
tmpDf.head(3)

Unnamed: 0,Brand,Year,Colors,Price
one,Ford,1960.0,red,370.5
two,,,,
three,,,,


In [89]:
tmpDf

Unnamed: 0,Brand,Year,Colors,Price
one,Ford,1960.0,red,370.5
two,,,,
three,,,,
four,Ford,1960.0,white,347.9
five,Lamborghini,1969.0,blue,543.7
six,Ferrari,1980.0,red,784.1
