In [2]:
import numpy as np
import pandas as pd

In [3]:
user_data = {
    "MarksA": np.random.randint(1, 100, 5),
    "MarksB": np.random.randint(50, 100, 5),
    "MarksC": np.random.randint(1, 100, 5)
}

In [4]:
user_data

{'MarksA': array([57, 54, 44,  3, 15]),
 'MarksB': array([94, 65, 96, 80, 90]),
 'MarksC': array([10, 50, 80, 74, 79])}

In [5]:
# convert dictionary into pandas dataframe
df = pd.DataFrame(user_data)
df.head()

Unnamed: 0,MarksA,MarksB,MarksC
0,57,94,10
1,54,65,50
2,44,96,80
3,3,80,74
4,15,90,79


In [6]:
# to create a csv file from a given dataframe
df.to_csv('marks.csv')

In [7]:
# read from csv file
my_data = pd.read_csv('marks.csv')          # my_data is a dataframe
my_data

Unnamed: 0.1,Unnamed: 0,MarksA,MarksB,MarksC
0,0,57,94,10
1,1,54,65,50
2,2,44,96,80
3,3,3,80,74
4,4,15,90,79


In [8]:
# to remove first extra column
my_data = my_data.drop(columns = ['Unnamed: 0'])
my_data

Unnamed: 0,MarksA,MarksB,MarksC
0,57,94,10
1,54,65,50
2,44,96,80
3,3,80,74
4,15,90,79


In [9]:
my_data.describe()

Unnamed: 0,MarksA,MarksB,MarksC
count,5.0,5.0,5.0
mean,34.6,85.0,58.6
std,24.234273,12.767145,29.779187
min,3.0,65.0,10.0
25%,15.0,80.0,50.0
50%,44.0,90.0,74.0
75%,54.0,94.0,79.0
max,57.0,96.0,80.0


In [10]:
# to view last 3 rows
my_data.tail(n = 3)

Unnamed: 0,MarksA,MarksB,MarksC
2,44,96,80
3,3,80,74
4,15,90,79


In [11]:
# to access a row
my_data.iloc[3]

MarksA     3
MarksB    80
MarksC    74
Name: 3, dtype: int64

In [12]:
# row and column
my_data.iloc[3, 1]

80

In [13]:
my_data.iloc[3][1]

80

In [14]:
# if we dont know the column name
idx = my_data.columns.get_loc('MarksB')
my_data.iloc[3, idx]

80

In [15]:
# if we want more than two columns
idx = [ my_data.columns.get_loc('MarksB'), my_data.columns.get_loc('MarksB') ]
my_data.iloc[3, idx]

MarksB    80
MarksB    80
Name: 3, dtype: int64

In [16]:
my_data.iloc[:3, [1, 2]]

Unnamed: 0,MarksB,MarksC
0,94,10
1,65,50
2,96,80


In [17]:
# sorting a data frame
my_data.sort_values(by = ['MarksA', 'MarksB'], ascending = True)
# first sort according to MarksA, if they are same then sort according to MarksB

Unnamed: 0,MarksA,MarksB,MarksC
3,3,80,74
4,15,90,79
2,44,96,80
1,54,65,50
0,57,94,10


In [18]:
data_array = my_data.values

In [19]:
print(data_array)

[[57 94 10]
 [54 65 50]
 [44 96 80]
 [ 3 80 74]
 [15 90 79]]


In [20]:
print(type(data_array))

<class 'numpy.ndarray'>


In [21]:
print(data_array.shape)

(5, 3)


In [22]:
# converting numpy arrays back to data frames
new_df = pd.DataFrame(data_array, dtype = 'int32', columns = ['Maths', 'Physics', 'Chemistry'])

In [23]:
new_df

Unnamed: 0,Maths,Physics,Chemistry
0,57,94,10
1,54,65,50
2,44,96,80
3,3,80,74
4,15,90,79


In [24]:
new_df.to_csv('PCM.csv', index = False)          # extra unnamed column is not saved using index = False

In [25]:
pcm = pd.read_csv('PCM.csv')
pcm

Unnamed: 0,Maths,Physics,Chemistry
0,57,94,10
1,54,65,50
2,44,96,80
3,3,80,74
4,15,90,79
