# PANDAS DATAFRAME

In [1]:
import pandas as pd

In [2]:
cars = pd.read_csv("export_dataframe.csv")
cars

Unnamed: 0,Brand,Price
0,Honda Civic,22000
1,Toyota Corolla,25000
2,Ford Focus,27000
3,Audi A4,35000


In [4]:
cars.describe()

Unnamed: 0,Price
count,4.0
mean,27250.0
std,5560.275773
min,22000.0
25%,24250.0
50%,26000.0
75%,29000.0
max,35000.0


In [5]:
cars.tail(3)

Unnamed: 0,Brand,Price
1,Toyota Corolla,25000
2,Ford Focus,27000
3,Audi A4,35000


In [6]:
cars.index

RangeIndex(start=0, stop=4, step=1)

In [7]:
cars.values

array([['Honda Civic', 22000],
       ['Toyota Corolla', 25000],
       ['Ford Focus', 27000],
       ['Audi A4', 35000]], dtype=object)

In [8]:
cars.shape

(4, 2)

In [9]:
cars.columns

Index(['Brand', 'Price'], dtype='object')

In [10]:
cars.axes

[RangeIndex(start=0, stop=4, step=1),
 Index(['Brand', 'Price'], dtype='object')]

In [12]:
cars.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Brand   4 non-null      object
 1   Price   4 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 192.0+ bytes


In [13]:
cars = pd.read_csv("export_dataframe.csv",index_col="Brand")
cars


Unnamed: 0_level_0,Price
Brand,Unnamed: 1_level_1
Honda Civic,22000
Toyota Corolla,25000
Ford Focus,27000
Audi A4,35000


## Değişiklik Yapma

In [124]:
cars = pd.read_csv("export_dataframe.csv")
cars

Unnamed: 0,Brand,Price
0,Honda Civic,22000
1,Toyota Corolla,25000
2,Ford Focus,27000
3,Audi A4,35000


In [23]:
cars[["Brand"]].head()

Unnamed: 0,Brand
0,Honda Civic
1,Toyota Corolla
2,Ford Focus
3,Audi A4


#### Yeni Sütun Ekleme

In [24]:
cars["AverageRate"] = 5
cars

Unnamed: 0,Brand,Price,AverageRate
0,Honda Civic,22000,5
1,Toyota Corolla,25000,5
2,Ford Focus,27000,5
3,Audi A4,35000,5


In [31]:
for i in cars[["Brand","AverageRate"]].values:
    print("{} {}".format(i[0],i[1]))

Honda Civic 4
Toyota Corolla 4
Ford Focus 4
Audi A4 5


In [126]:
cars["AverageRate"] = [ 5 if i>30000 else 4 for i in cars["Price"].values]
cars

Unnamed: 0,Brand,Price,AverageRate
0,Honda Civic,22000,4
1,Toyota Corolla,25000,4
2,Ford Focus,27000,4
3,Audi A4,35000,5


In [36]:
cars.insert(2,column="good",
             value=[True if i==5 else False for i in cars["AverageRate"].values]
            )

ValueError: cannot insert good, already exists

In [35]:
cars

Unnamed: 0,Brand,Price,good,AverageRate
0,Honda Civic,22000,False,4
1,Toyota Corolla,25000,False,4
2,Ford Focus,27000,False,4
3,Audi A4,35000,True,5


In [44]:
cars["Price"] = [i/10000 for i in cars["Price"].values]

In [45]:
cars["Price"] *= 100
cars

Unnamed: 0,Brand,Price,good,AverageRate
0,Honda Civic,22000.0,False,4
1,Toyota Corolla,25000.0,False,4
2,Ford Focus,27000.0,False,4
3,Audi A4,35000.0,True,5


In [46]:
cars["TL"] = cars["Price"] * 7

In [47]:
cars

Unnamed: 0,Brand,Price,good,AverageRate,TL
0,Honda Civic,22000.0,False,4,154000.0
1,Toyota Corolla,25000.0,False,4,175000.0
2,Ford Focus,27000.0,False,4,189000.0
3,Audi A4,35000.0,True,5,245000.0


In [48]:
cars["AverageRate"].value_counts()

4    3
5    1
Name: AverageRate, dtype: int64

In [52]:
import numpy as np
np.unique(cars["AverageRate"].values)

array([4, 5])

## DROPNA , FIILNA , ASTYPE, RANK METODU 

In [56]:
cars.dropna(how="any",inplace=True) # 1 sütün silmek için yeterli
cars

Unnamed: 0,Brand,Price,good,AverageRate,TL
0,Honda Civic,22000.0,False,4,154000.0
1,Toyota Corolla,25000.0,False,4,175000.0
2,Ford Focus,27000.0,False,4,189000.0
3,Audi A4,35000.0,True,5,245000.0


In [57]:
cars.dropna(how="all") # Bütün sütunlar NaN olmalı silmek için

Unnamed: 0,Brand,Price,good,AverageRate,TL
0,Honda Civic,22000.0,False,4,154000.0
1,Toyota Corolla,25000.0,False,4,175000.0
2,Ford Focus,27000.0,False,4,189000.0
3,Audi A4,35000.0,True,5,245000.0


In [60]:
cars.dropna(subset=["Brand","Price"]) # Brand ve Price değeri NaN ise sil

Unnamed: 0,Brand,Price,good,AverageRate,TL
0,Honda Civic,22000.0,False,4,154000.0
1,Toyota Corolla,25000.0,False,4,175000.0
2,Ford Focus,27000.0,False,4,189000.0
3,Audi A4,35000.0,True,5,245000.0


In [63]:
cars.fillna(1) # Bütün NaN değerleri yerine 1 koy

Unnamed: 0,Brand,Price,good,AverageRate,TL
0,Honda Civic,22000.0,False,4,154000.0
1,Toyota Corolla,25000.0,False,4,175000.0
2,Ford Focus,27000.0,False,4,189000.0
3,Audi A4,35000.0,True,5,245000.0


In [70]:
cars["Price"].fillna(cars["Price"].mean(),inplace=True)
cars

Unnamed: 0,Brand,Price,good,AverageRate,TL
0,Honda Civic,22000.0,False,4,154000.0
1,Toyota Corolla,25000.0,False,4,175000.0
2,Ford Focus,5.0,False,4,189000.0
3,Audi A4,35000.0,True,5,245000.0


### ASTYPE 

In [71]:
cars.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4 entries, 0 to 3
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Brand        4 non-null      object 
 1   Price        4 non-null      float64
 2   good         4 non-null      bool   
 3   AverageRate  4 non-null      int64  
 4   TL           4 non-null      float64
dtypes: bool(1), float64(2), int64(1), object(1)
memory usage: 324.0+ bytes


In [73]:
cars["Price"].fillna(0,inplace=True) # NaN değerler varsa değiştirme olmaz.

In [75]:
cars["Price"] = cars["Price"].astype("int") # floattan int e çevirildi.
cars

Unnamed: 0,Brand,Price,good,AverageRate,TL
0,Honda Civic,22000,False,4,154000.0
1,Toyota Corolla,25000,False,4,175000.0
2,Ford Focus,5,False,4,189000.0
3,Audi A4,35000,True,5,245000.0


In [79]:
cars.sort_values(["Price","AverageRate"],ascending=[False,True])

Unnamed: 0,Brand,Price,good,AverageRate,TL
3,Audi A4,35000,True,5,245000.0
1,Toyota Corolla,25000,False,4,175000.0
0,Honda Civic,22000,False,4,154000.0
2,Ford Focus,5,False,4,189000.0


In [81]:
cars.sort_index()

Unnamed: 0,Brand,Price,good,AverageRate,TL
0,Honda Civic,22000,False,4,154000.0
1,Toyota Corolla,25000,False,4,175000.0
2,Ford Focus,5,False,4,189000.0
3,Audi A4,35000,True,5,245000.0


In [86]:
np.sort(cars["Brand"].values)

array(['Audi A4', 'Ford Focus', 'Honda Civic', 'Toyota Corolla'],
      dtype=object)

In [87]:
cars.sort_index(ascending=False)

Unnamed: 0,Brand,Price,good,AverageRate,TL
3,Toyota Corolla,35000,True,5,245000.0
2,Honda Civic,5,False,4,189000.0
1,Ford Focus,25000,False,4,175000.0
0,Audi A4,22000,False,4,154000.0


In [91]:
cars["PriceRank"] = cars["Price"].rank(ascending=False).astype("int")
cars

Unnamed: 0,Brand,Price,good,AverageRate,TL,PriceRank
0,Audi A4,22000,False,4,154000.0,3
1,Ford Focus,25000,False,4,175000.0,2
2,Honda Civic,5,False,4,189000.0,4
3,Toyota Corolla,35000,True,5,245000.0,1


## ISIN ,ISNULL,NOTNULL

In [95]:
cars["Brand"] == "Audi" 

0    False
1    False
2    False
3    False
Name: Brand, dtype: bool

### Filtrelemek

In [94]:
cars[cars["Price"]>30000]

Unnamed: 0,Brand,Price,good,AverageRate,TL,PriceRank
3,Toyota Corolla,35000,True,5,245000.0,1


In [100]:
cars[cars["Brand"].isin(["Audi A4","Ford Focus"])]

Unnamed: 0,Brand,Price,good,AverageRate,TL,PriceRank
0,Audi A4,22000,False,4,154000.0,3
1,Ford Focus,25000,False,4,175000.0,2


In [102]:
cars[cars["Price"].isnull()]

Unnamed: 0,Brand,Price,good,AverageRate,TL,PriceRank


In [103]:
cars[cars["Price"].notnull()]

Unnamed: 0,Brand,Price,good,AverageRate,TL,PriceRank
0,Audi A4,22000,False,4,154000.0,3
1,Ford Focus,25000,False,4,175000.0,2
2,Honda Civic,5,False,4,189000.0,4
3,Toyota Corolla,35000,True,5,245000.0,1


### Between , Duplicated , Drop_duplicates , unique

In [108]:
cars["Price"].between(0,20000) # 0 ve 20000 dahil

0    False
1    False
2     True
3    False
Name: Price, dtype: bool

In [112]:
cars[cars["AverageRate"].duplicated(keep="last")] # duplicate olanları sonuncuyu atarak getir.

Unnamed: 0,Brand,Price,good,AverageRate,TL,PriceRank
0,Audi A4,22000,False,4,154000.0,3
1,Ford Focus,25000,False,4,175000.0,2


In [113]:
cars.drop_duplicates(subset=["AverageRate"])

Unnamed: 0,Brand,Price,good,AverageRate,TL,PriceRank
0,Audi A4,22000,False,4,154000.0,3
3,Toyota Corolla,35000,True,5,245000.0,1


In [114]:
cars

Unnamed: 0,Brand,Price,good,AverageRate,TL,PriceRank
0,Audi A4,22000,False,4,154000.0,3
1,Ford Focus,25000,False,4,175000.0,2
2,Honda Civic,5,False,4,189000.0,4
3,Toyota Corolla,35000,True,5,245000.0,1


In [116]:
cars["AverageRate"].unique().size

2

### SET_INDEX , LOC ,ILOC

In [120]:
cars.set_index("Brand",inplace=True)


KeyError: "None of ['Brand'] are in the columns"

In [125]:
cars

Unnamed: 0,Brand,Price
0,Honda Civic,22000
1,Toyota Corolla,25000
2,Ford Focus,27000
3,Audi A4,35000


In [127]:
cars.reset_index(inplace=True)
cars

Unnamed: 0,index,Brand,Price,AverageRate
0,0,Honda Civic,22000,4
1,1,Toyota Corolla,25000,4
2,2,Ford Focus,27000,4
3,3,Audi A4,35000,5


In [164]:
cars = pd.read_csv("export_dataframe.csv",index_col="Brand")
cars

Unnamed: 0_level_0,Price
Brand,Unnamed: 1_level_1
Honda Civic,22000
Toyota Corolla,25000
Ford Focus,27000
Audi A4,35000


In [144]:
cars.loc["Honda Civic"]

Price    22000
Name: Honda Civic, dtype: int64

In [147]:
cars.iloc[0:5]

Unnamed: 0_level_0,Price
Brand,Unnamed: 1_level_1
Honda Civic,22000
Toyota Corolla,25000
Ford Focus,27000
Audi A4,35000


In [150]:
cars.iloc[0]

Price    22000
Name: Honda Civic, dtype: int64

In [152]:
cars.iloc[0,0]

22000

### YENİ DEĞER ATAMA ,RENAME , DROP ,POP

In [165]:
cars.iloc[3,0] = 10
cars

Unnamed: 0_level_0,Price
Brand,Unnamed: 1_level_1
Honda Civic,22000
Toyota Corolla,25000
Ford Focus,27000
Audi A4,10


In [273]:
cars = pd.read_csv("export_dataframe.csv")
cars


Unnamed: 0,Brand,Price
0,Honda Civic,22000
1,Toyota Corolla,25000
2,Ford Focus,27000
3,Audi A4,35000


In [178]:
cars.rename(columns={"Brand":"Marka","Price":"Fiyat($)"})

Unnamed: 0,Marka,Fiyat($)
0,Honda Civic,22000
1,Toyota Corolla,25000
2,Ford Focus,27000
3,Audi A4,35000


In [179]:
cars.drop(labels=["Price"],axis=1)

Unnamed: 0,Brand
0,Honda Civic
1,Toyota Corolla
2,Ford Focus
3,Audi A4


In [180]:
cars.pop("Price")

0    22000
1    25000
2    27000
3    35000
Name: Price, dtype: int64

In [182]:
cars.sample(2,axis=0)

Unnamed: 0,Brand
2,Ford Focus
0,Honda Civic


In [192]:
cars["Price"].sort_values()
cars.nsmallest(2,columns="Price")
cars.nlargest(2,columns="Price")

Unnamed: 0,Brand,Price
3,Audi A4,35000
2,Ford Focus,27000


In [193]:
cars.where(cars["Brand"] == "Audi A4")

Unnamed: 0,Brand,Price
0,,
1,,
2,,
3,Audi A4,35000.0


In [197]:
cars.query('Brand == "Audi A4" & Price >= 5')

Unnamed: 0,Brand,Price
3,Audi A4,35000


In [199]:
cars["Price"] = cars["Price"].apply(lambda x: str(x)+" $")

In [200]:
cars

Unnamed: 0,Brand,Price
0,Honda Civic,22000 $
1,Toyota Corolla,25000 $
2,Ford Focus,27000 $
3,Audi A4,35000 $


## Group By

In [202]:
len(cars.groupby("Price"))

4

In [203]:
cars["Price"].value_counts()

22000 $    1
27000 $    1
25000 $    1
35000 $    1
Name: Price, dtype: int64

In [206]:
cars.groupby("Price").first()
cars.groupby("Price").last()

Unnamed: 0_level_0,Brand
Price,Unnamed: 1_level_1
22000 $,Honda Civic
25000 $,Toyota Corolla
27000 $,Ford Focus
35000 $,Audi A4


In [219]:
group = cars.groupby("Brand")
group.get_group("Honda Civic")

Unnamed: 0,Brand,Price
0,Honda Civic,22000


In [220]:
group.max()

Unnamed: 0_level_0,Price
Brand,Unnamed: 1_level_1
Audi A4,35000
Ford Focus,27000
Honda Civic,22000
Toyota Corolla,25000


In [221]:
group.mean()

Unnamed: 0_level_0,Price
Brand,Unnamed: 1_level_1
Audi A4,35000
Ford Focus,27000
Honda Civic,22000
Toyota Corolla,25000


In [222]:
group.size()

Brand
Audi A4           1
Ford Focus        1
Honda Civic       1
Toyota Corolla    1
dtype: int64

In [223]:
group.agg({"Price":"mean","Price":"sum"})

Unnamed: 0_level_0,Price
Brand,Unnamed: 1_level_1
Audi A4,35000
Ford Focus,27000
Honda Civic,22000
Toyota Corolla,25000


In [224]:
df = pd.DataFrame(columns=cars.columns)

In [225]:
df

Unnamed: 0,Brand,Price


In [227]:
pd.concat([cars,cars],ignore_index=True)

Unnamed: 0,Brand,Price
0,Honda Civic,22000
1,Toyota Corolla,25000
2,Ford Focus,27000
3,Audi A4,35000
4,Honda Civic,22000
5,Toyota Corolla,25000
6,Ford Focus,27000
7,Audi A4,35000


In [234]:
x = pd.concat([cars,cars],keys=[1,2])
x

Unnamed: 0,Unnamed: 1,Brand,Price
1,0,Honda Civic,22000
1,1,Toyota Corolla,25000
1,2,Ford Focus,27000
1,3,Audi A4,35000
2,0,Honda Civic,22000
2,1,Toyota Corolla,25000
2,2,Ford Focus,27000
2,3,Audi A4,35000


In [237]:
x.loc[1]

Unnamed: 0,Brand,Price
0,Honda Civic,22000
1,Toyota Corolla,25000
2,Ford Focus,27000
3,Audi A4,35000


In [252]:
x.loc[1,2].values

array(['Ford Focus', 27000], dtype=object)

In [253]:
cars.append(x)

Unnamed: 0,Brand,Price
0,Honda Civic,22000
1,Toyota Corolla,25000
2,Ford Focus,27000
3,Audi A4,35000
"(1, 0)",Honda Civic,22000
"(1, 1)",Toyota Corolla,25000
"(1, 2)",Ford Focus,27000
"(1, 3)",Audi A4,35000
"(2, 0)",Honda Civic,22000
"(2, 1)",Toyota Corolla,25000


In [258]:
cars.merge(cars,how="inner",on="Brand",suffixes=(" - 2010"," - 2006"))

Unnamed: 0,Brand,Price - 2010,Price - 2006
0,Honda Civic,22000,22000
1,Toyota Corolla,25000,25000
2,Ford Focus,27000,27000
3,Audi A4,35000,35000


In [260]:
cars.join(cars,how="outer",lsuffix=" 2017",rsuffix=" 2018")

Unnamed: 0,Brand 2017,Price 2017,Brand 2018,Price 2018
0,Honda Civic,22000,Honda Civic,22000
1,Toyota Corolla,25000,Toyota Corolla,25000
2,Ford Focus,27000,Ford Focus,27000
3,Audi A4,35000,Audi A4,35000


## Birden Fazla Indeks

In [262]:
cars.set_index(keys=["Brand","Price"],inplace=True)

In [263]:
cars

Brand,Price
Honda Civic,22000
Toyota Corolla,25000
Ford Focus,27000
Audi A4,35000


In [264]:
cars.sort_index()

Brand,Price
Audi A4,35000
Ford Focus,27000
Honda Civic,22000
Toyota Corolla,25000


In [265]:
cars.index.names

FrozenList(['Brand', 'Price'])

In [267]:
cars.values

array([], shape=(4, 0), dtype=float64)

In [270]:
cars.index.set_names(["TAKIM","TAKMA ISIM"],inplace=True)

In [271]:
cars

TAKIM,TAKMA ISIM
Honda Civic,22000
Toyota Corolla,25000
Ford Focus,27000
Audi A4,35000


In [277]:
cars["Brand"].str.lower().str.upper().str.title()

0       Honda Civic
1    Toyota Corolla
2        Ford Focus
3           Audi A4
Name: Brand, dtype: object

In [280]:
cars["Brand"].str.len()
cars["Brand"] = cars["Brand"].apply(lambda x: x.lower())

In [283]:
cars["Brand"]=cars["Brand"].str.replace("honda civic","Honda Civic")
cars

Unnamed: 0,Brand,Price
0,Honda Civic,22000
1,toyota corolla,25000
2,ford focus,27000
3,audi a4,35000


In [285]:
cars[cars["Brand"].str.contains("audi")]

Unnamed: 0,Brand,Price
3,audi a4,35000


In [301]:
cars.columns = cars.columns.str.upper().str.strip()
for columnName in cars.columns:
    if type(cars[columnName]) == str:
        cars[columnName] = cars[columnName].str.strip()
cars

Unnamed: 0,BRAND,PRICE
0,Honda Civic,22000
1,toyota corolla,25000
2,ford focus,27000
3,audi a4,35000


# PIVOT ve MELT 