## Pandas
- Dataframe - Special object to store data in tabular form (rows & columns)

### Installation
pip install pandas

In [1]:
import numpy as np
import pandas as pd

In [2]:
# Creating a data frame
my_data = {
    "MarksA": np.random.randint(1, 100, 5),
    "MarksB": np.random.randint(5, 80, 5),
    "MarksC": np.random.randint(32, 97, 5),
}

In [3]:
my_data

{'MarksA': array([67, 43, 29, 15, 24]),
 'MarksB': array([40, 19, 38,  6,  6]),
 'MarksC': array([86, 90, 69, 85, 56])}

In [4]:
user_data = pd.DataFrame(my_data)

In [5]:
print(user_data)

   MarksA  MarksB  MarksC
0      67      40      86
1      43      19      90
2      29      38      69
3      15       6      85
4      24       6      56


In [6]:
user_data.head(n=3)

Unnamed: 0,MarksA,MarksB,MarksC
0,67,40,86
1,43,19,90
2,29,38,69


In [7]:
print(user_data.columns)

Index(['MarksA', 'MarksB', 'MarksC'], dtype='object')


In [8]:
user_data.to_csv("marks.csv")

In [9]:
df = pd.read_csv("marks.csv")
df = df.drop(columns = ['Unnamed: 0'])
df

Unnamed: 0,MarksA,MarksB,MarksC
0,67,40,86
1,43,19,90
2,29,38,69
3,15,6,85
4,24,6,56


In [10]:
df.describe()

Unnamed: 0,MarksA,MarksB,MarksC
count,5.0,5.0,5.0
mean,35.6,21.8,77.2
std,20.268202,16.589153,14.307341
min,15.0,6.0,56.0
25%,24.0,6.0,69.0
50%,29.0,19.0,85.0
75%,43.0,38.0,86.0
max,67.0,40.0,90.0


In [11]:
df.tail(n=2)

Unnamed: 0,MarksA,MarksB,MarksC
3,15,6,85
4,24,6,56


In [12]:
# Accesing a row
df.iloc[2]

MarksA    29
MarksB    38
MarksC    69
Name: 2, dtype: int64

In [13]:
# Row & Col
df.iloc[2][1]

38

In [14]:
idx = df.columns.get_loc('MarksA')
df.iloc[3, idx]

15

In [15]:
idx = [df.columns.get_loc('MarksB'), df.columns.get_loc('MarksC')]
df.iloc[3, idx]

MarksB     6
MarksC    85
Name: 3, dtype: int64

In [16]:
df.iloc[:3, idx]

Unnamed: 0,MarksB,MarksC
0,40,86
1,19,90
2,38,69


In [17]:
## Sort data frame
df

Unnamed: 0,MarksA,MarksB,MarksC
0,67,40,86
1,43,19,90
2,29,38,69
3,15,6,85
4,24,6,56


In [18]:
df.sort_values(by = ["MarksA"], ascending = True)

Unnamed: 0,MarksA,MarksB,MarksC
3,15,6,85
4,24,6,56
2,29,38,69
1,43,19,90
0,67,40,86


In [19]:
## Get a numpy array from a data frame
data_array = df.values
print(type(data_array))
print(data_array.shape)
print(data_array)

<class 'numpy.ndarray'>
(5, 3)
[[67 40 86]
 [43 19 90]
 [29 38 69]
 [15  6 85]
 [24  6 56]]


In [20]:
## Convert numpy array int a data frame
new_df = pd.DataFrame(data_array, dtype  = 'int32', columns = ["Physics", "Chemistry", "Maths"])
print(new_df)

   Physics  Chemistry  Maths
0       67         40     86
1       43         19     90
2       29         38     69
3       15          6     85
4       24          6     56


In [21]:
new_df.to_csv("PCM.csv")