# Pandas Guide  

### Import Pandas Module

In [2]:
import pandas as pd

### Create a DataFrame manually

In [3]:
'''
NOTE :
DataFrame indices can be numerical or string values
Single column or single row of a DataFrame is a Series
Most of operations are immutable, that means they do not modify DataFrame, they return new DataFrame instead
'''

pd.DataFrame({"column1": ["value11", "value21", "value31"], "column2" : ["value21", "value22", "value23"]}, index=["index1", "index2", "index3"])

Unnamed: 0,column1,column2
index1,value11,value21
index2,value21,value22
index3,value31,value23


### File operations

In [None]:
df = pd.read_csv("file name")                  # read dataframe from csv file
df = pd.read_excel("file name")                # read dataframe from Excel sheet
df = pd.read_json("file name")                 # read dataframe from JSON string

df.to_csv("file name")                         # write dataframe to csv file
df.to_excel("file name")                       # write dataframe to Excel sheet
df.to_json("file name")                        # write dataframe to JSON string

### Plot DataFrame

In [None]:
df.plot(x="column1", y="column2", title="this is a title")

### Show some rows

In [None]:
df.head()                                     # show first 5 rows of dataframe
df.head(12)                                   # you can specify number of rows to show

df.tail()                                     # show last 5 rows of dataframe
df.tail(7)                                    # you can specify number of rows to show

### Get row or column count

In [None]:
len(df.index)                                  # row count
len(df.columns)                                # column count

### Get an element

In [None]:
df.at["index1", "column1"]
df.at["index1", "column1"] = "value"           # to update single cell

### Get a row or rows

In [None]:
df.iloc[7]                                     # get a row via th order
df.loc["index1"]                               # get a row via index name
df.loc[["index1", "index2"]]                   # get rows via list
df.loc[[True, False, False]]                   # get rows via bool array

### Get a column or columns

In [None]:
df["column1"]                                  # get a column via column name
df[["column1", "column2"]]                     # get columns via list

### Update row or column names

In [None]:
df.rename(index={"index1": "new1", "index2": "new2"})
df.rename(columns={"column1": "new1", "column2": "new2"})

### Delete row or columns

In [None]:
df.drop(index=["index1", "index2"])             # drop rows via list
df.drop(index=[True, False, False])             # drop rows via bool array

df.drop(columns=["column1", "column2"])         # drop columns via list
df.drop(columns=[True, False, False])           # drop columns via bool array

### Get rows via query

In [None]:
df.query("`column1` > 24 and `column2` == 'value'")

### Grouping Data

In [None]:
df.groupby(["column1", "column2"])

### Sorting rows

In [None]:
df.sort_values("column1")                        # sort rows according to column
df.sort_values("column1", ascending=False)       # reverse order

### Insert a row or rows

In [None]:
pd.concat([df, pd.DataFrame({"column1": ["value1"], "column2": ["value2"]})])

### Insert a column or columns

In [None]:
df_1["new column"] = df_2["column1"]             # you can create a copy of a column itself or another dataframe
df["new column"] = df["column1"] + df["column2"] # or you can create a column by doing arithmetic operations with other columns

### NaN Operations

In [None]:
df.dropna("index")                                # if a row has any NaN value, then drop it
df.dropna(axis=0)                                 # Same as index

df.dropna("column")                               # if a column has any NaN value, then drop it
df.dropna(axis=1)                                 # Same as column

df.dropna(subset=["column1", "column2"])          # check these columns for drop operation, not all of them

df["column1"].notna()                             # return a bool array according to NaN values

### Having Operation

In [None]:
df["column1"].isin(["value1", "value2"])          # return a bool array according to the list

### Change data type of column

In [None]:
df["column1"].astype("int")

### Convert a column to numpy array

In [None]:
df["column1"].to_numpy()

### Reset Index

In [None]:
df.reset_index()                                  # change index values according to the row order
df.reset_index(drop=True)                         # if you dont want to hold old index values as a new column