# Pandas Guide  

### To import Pandas Module

In [4]:
import pandas as pd

### Creating a DataFrame manually

In [16]:
'''
NOTE :
DataFrame indices can be numerical or string values
Single column or single row of a DataFrame is a Series
Most of operations are immutable, that means they do not modify DataFrame, they return the new DataFrame instead
'''

pd.DataFrame({"city": ["ankara", "istanbul", "izmir"], "temp" : [28, 24, 30]}, index=[0, 1, "foo"])

Unnamed: 0,city,temp
0,ankara,28
1,istanbul,24
foo,izmir,30


### File Operations

In [None]:
df = pd.read_csv("file name")                  # read dataframe from csv file
df = pd.read_excel("file name")                # read dataframe from Excel sheet
df = pd.read_json("file name")                 # read dataframe from JSON string

df.to_csv("file name")                         # write dataframe to csv file
df.to_excel("file name")                       # write dataframe to Excel sheet
df.to_json("file name")                        # write dataframe to JSON string

### Get Row or Column Count

In [None]:
len(df.index)                                  # row count
len(df.columns)                                # column count

### Get an element

In [None]:
df.at["index1", "column1"]
df.at["index1", "column1"] = "value"           # to update single cell

### Get a row or rows

In [None]:
df.iloc[7]                                     # get a row via th order
df.loc["index1"]                               # get a row via index name
df.loc[["index1", "index2"]]                   # get rows via list
df.loc[[True, False, False]]                   # get rows via bool array

### Get a column or columns

In [None]:
df["column1"]                                  # get a column via column name
df[["column1", "column2"]]                     # get columns via list

### Update row or column names

In [None]:
df.rename(index={"index1": "new1", "index2": "new2"})
df.rename(columns={"column1": "new1", "column2": "new2"})

### Delete row or columns

In [None]:
df.drop(index=["index1", "index2"])             # drop rows via list
df.drop(index=[True, False, False])             # drop rows via bool array

df.drop(columns=["column1", "column2"])         # drop columns via list
df.drop(columns=[True, False, False])           # drop columns via bool array

### Delete rows via filter

In [None]:
df.filter()

### Get rows via query

In [None]:
df.query("`column1` > 24 and `column2` == 'value'")

### Grouping Data

In [None]:
df.groupby(["column1", "column2"])

### Sorting rows

In [None]:
df.sort_values("column1")                        # sort rows according to column
df.sort_values("column1", ascending=False)       # reverse order

### Insert a row or rows

In [None]:
# you should create a dataframe to insert a row
new_df = pd.DataFrame({"city": ["Trabzon"], "temp": [24]})
pd.concat([df, new_df])

### Insert a column or columns

In [None]:
# column should be type of Series
df["new column"] = column
# or you can copy from other column
df["new column"] = df["column1"]
# or you can create a column via operate other columns
df["new column"] = df["column1"] + df["column2"]

### NaN Operations

In [None]:
df.dropna("index")                                # if a row has any NaN value, then drop it
df.dropna(axis=0)                                 # Same as index

df.dropna("column")                               # if a column has any NaN value, then drop it
df.dropna(axis=1)                                 # Same as column

df.dropna(subset=["column1", "column2"])          # check these columns, not all of them

df["column1"].notna()                             # return a bool array according to NaN values

### Having Operation

In [None]:
# return a bool array according to the list
# does the cell have any of the value in the list?
df["column1"].isin(["value1", "value2"])

### To change data type of column

In [None]:
df["column1"].astype("int")

### To convert a column to numpy array

In [None]:
df["column1"].to_numpy()