# DataFrame

In [2]:
import pandas as pd
import numpy as np

## Creation of DataFrame
There are a number of ways to create a DataFrame. 
Some of them are listed in this section.

### A) Creation of an empty DataFrame 
An empty DataFrame can be created as follows:

In [2]:
dFrameEmt = pd.DataFrame()
dFrameEmt

### B) Creation of DataFrame from NumPy ndarrays


In [3]:
array1 = np.array([10, 20, 30])
array2 = np.array([100, 200, 300])
array3 = np.array([-10, -20, -30, -40])

In [4]:
dFrame1 = pd.DataFrame(array1)
dFrame1

Unnamed: 0,0
0,10
1,20
2,30


In [5]:
dFrame2 = pd.DataFrame([array1, array2, array3], columns = ["A", "B", "C", "D"])
dFrame2

Unnamed: 0,A,B,C,D
0,10,20,30,
1,100,200,300,
2,-10,-20,-30,-40.0


### C) Creation of DataFrame from List of Dictionaries

In [6]:
listDict = [ {"a":10, "b":20}, {"a":5, "b":10, "c":20} ]
listDict

[{'a': 10, 'b': 20}, {'a': 5, 'b': 10, 'c': 20}]

In [7]:
dFrameListDict = pd.DataFrame(listDict)
dFrameListDict

Unnamed: 0,a,b,c
0,10,20,
1,5,10,20.0


### D) Creation of DataFrame from Dictionary of Lists

In [8]:
dictForest = {
    "State": ["Assam", "Delhi", "Kerla"],
    "Green Area": [78438, 1483, 38852],
    "VDF": [2797, 6.72, 1663]
}

In [9]:
dFrameForest = pd.DataFrame(dictForest)
dFrameForest

Unnamed: 0,State,Green Area,VDF
0,Assam,78438,2797.0
1,Delhi,1483,6.72
2,Kerla,38852,1663.0


In [10]:
dFrameForest1 = pd.DataFrame(dictForest, columns = ["State", "VDF", "GArea"])
dFrameForest1

Unnamed: 0,State,VDF,GArea
0,Assam,2797.0,
1,Delhi,6.72,
2,Kerla,1663.0,


### E) Creation of DataFrame from Series 

In [11]:
seriesA = pd.Series([1, 2, 3, 4, 5], index = ["a", "b", "c", "d", "e"])
seriesB = pd.Series([1000, 2000, -1000, -5000, 100], index = ["a", "b", "c", "d", "e"])
seriesC = pd.Series([10, 20, -10, -50, 100], index = ["a", "b", "c", "d", "e"])

print(seriesA)
print("\n\n")
print(seriesB)
print("\n\n")
print(seriesC)

a    1
b    2
c    3
d    4
e    5
dtype: int64



a    1000
b    2000
c   -1000
d   -5000
e     100
dtype: int64



a     10
b     20
c    -10
d    -50
e    100
dtype: int64


In [12]:
dFrame1 = pd.DataFrame(seriesA)
dFrame1

Unnamed: 0,0
a,1
b,2
c,3
d,4
e,5


In [13]:
dFrame2 = pd.DataFrame([seriesA, seriesB])
dFrame2

Unnamed: 0,a,b,c,d,e
0,1,2,3,4,5
1,1000,2000,-1000,-5000,100


In [14]:
dFrame3 = pd.DataFrame([seriesA, seriesC])
dFrame3

Unnamed: 0,a,b,c,d,e
0,1,2,3,4,5
1,10,20,-10,-50,100


### F) Creation of DataFrame from Dictionary of Series

In [41]:
# Example 1:

ResultSheet = {
    "Arnab": pd.Series([90, 91, 97], index = ["Maths", "Science", "Hindi"]),
    "Ramit": pd.Series([92, 81, 96], index = ["Maths", "Science", "Hindi"]),
    "Samridhi": pd.Series([89, 91, 88], index = ["Maths", "Science", "Hindi"]),
    "Riya": pd.Series([81, 71, 67], index = ["Maths", "Science", "Hindi"]),
    "Malika": pd.Series([94, 95, 99], index = ["Maths", "Science", "Hindi"])
}

In [42]:
ResultDF = pd.DataFrame(ResultSheet)
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika
Maths,90,92,89,81,94
Science,91,81,91,71,95
Hindi,97,96,88,67,99


In [17]:
type(ResultDF.Arnab)

pandas.core.series.Series

In [18]:
# Example 2:

dictForUnion = {
    "Series1" : pd.Series([1, 2, 3, 4, 5], index = ["a", "b", "c", "d", "e"]),
    "Series2" : pd.Series([10, 20, -10, -50, 100], index = ["z", "y", "a", "c", "e"]),
    "Series3" : pd.Series([10, 20, -10, -50, 100], index = ["z", "y", "a", "c", "e"])
}

dictForUnion

{'Series1': a    1
 b    2
 c    3
 d    4
 e    5
 dtype: int64,
 'Series2': z     10
 y     20
 a    -10
 c    -50
 e    100
 dtype: int64,
 'Series3': z     10
 y     20
 a    -10
 c    -50
 e    100
 dtype: int64}

In [19]:
dFrameUnion = pd.DataFrame(dictForUnion)
dFrameUnion

Unnamed: 0,Series1,Series2,Series3
a,1.0,-10.0,-10.0
b,2.0,,
c,3.0,-50.0,-50.0
d,4.0,,
e,5.0,100.0,100.0
y,,20.0,20.0
z,,10.0,10.0


## Operations on Rows and Columns in DataFrame 

### A) Adding a New Column to a DataFrame

In [20]:
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika
Maths,90,92,89,81,94
Science,91,81,91,71,95
Hindi,97,96,88,67,99


In [21]:
ResultDF["Preeti"] = [89, 78, 76]
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika,Preeti
Maths,90,92,89,81,94,89
Science,91,81,91,71,95,78
Hindi,97,96,88,67,99,76


In [22]:
ResultDF["Ramit"] = [99, 98, 78]
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika,Preeti
Maths,90,99,89,81,94,89
Science,91,98,91,71,95,78
Hindi,97,78,88,67,99,76


In [23]:
ResultDF["Arnab"] = 90
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika,Preeti
Maths,90,99,89,81,94,89
Science,90,98,91,71,95,78
Hindi,90,78,88,67,99,76


### B) Adding a New Row to a DataFrame

In [24]:
ResultDF.loc["English"] = [85, 86, 83, 80, 90, 89]
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika,Preeti
Maths,90,99,89,81,94,89
Science,90,98,91,71,95,78
Hindi,90,78,88,67,99,76
English,85,86,83,80,90,89


In [25]:
ResultDF.loc["English"] = [95, 86, 95, 80, 95, 99]
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika,Preeti
Maths,90,99,89,81,94,89
Science,90,98,91,71,95,78
Hindi,90,78,88,67,99,76
English,95,86,95,80,95,99


In [26]:
ResultDF.loc["Maths"] = 0
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika,Preeti
Maths,0,0,0,0,0,0
Science,90,98,91,71,95,78
Hindi,90,78,88,67,99,76
English,95,86,95,80,95,99


In [27]:
ResultDF[: ] = 0
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika,Preeti
Maths,0,0,0,0,0,0
Science,0,0,0,0,0,0
Hindi,0,0,0,0,0,0
English,0,0,0,0,0,0


### C) Deleting Rows or Columns from a DataFrame

<br>We can use the DataFrame.drop() method to delete rows and columns from a DataFrame. 
<br>We need to specify the names of the labels to be dropped and the axis from which they need to be dropped. </br>
<br>To delete a row, the parameter axis is assigned the value 0 and </br>
<br>for deleting a column,the parameter axis is assigned the value 1. </br>

<br>Consider the following DataFrame:</br>

In [37]:
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika
Maths,90,92,89,81,94
Science,91,81,91,71,95
Hindi,97,96,88,67,99


In [38]:
ResultDF = ResultDF.drop("Science", axis = 0)
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika
Maths,90,92,89,81,94
Hindi,97,96,88,67,99


In [39]:
ResultDF = ResultDF.drop(["Samridhi", "Ramit", "Riya"], axis = 1)
ResultDF

Unnamed: 0,Arnab,Malika
Maths,90,94
Hindi,97,99


### D) Renaming Row Labels of a DataFrame

In [43]:
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika
Maths,90,92,89,81,94
Science,91,81,91,71,95
Hindi,97,96,88,67,99


In [32]:
ResultDF = ResultDF.rename({"Maths":"Sub1",
                            "Science":"Sub2",
                            "Hindi":"Sub3",
                            "English":"Sub4"})

ResultDF

Unnamed: 0,Arnab,Malika,Preeti
Sub1,0,0,0
Sub3,0,0,0
Sub4,0,0,0


### E) Renaming Column Labels of a DataFrame

In [33]:
ResultDF = ResultDF.rename({"Arnab": "Student1",
                            "Ramit": "Student2",
                            "Samridhi": "Student3",
                            "Malika": "Student4"}, axis="columns")

ResultDF

Unnamed: 0,Student1,Student4,Preeti
Sub1,0,0,0
Sub3,0,0,0
Sub4,0,0,0


In [36]:
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika
Maths,90,92,89,81,94
Science,91,81,91,71,95
Hindi,97,96,88,67,99


## Accessing DataFrames Element through Indexing

Data elements in a DataFrame can be accessed using 
indexing

.There are two ways of indexing Dataframes : 
Label based indexing and Boolean Indexing.

### A) Label Based Indexing

In [44]:
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika
Maths,90,92,89,81,94
Science,91,81,91,71,95
Hindi,97,96,88,67,99


In [62]:
ResultDF.loc["Science"]

Arnab       91
Ramit       81
Samridhi    91
Riya        71
Malika      95
Name: Science, dtype: int64

In [49]:
dFrame10Multiples = pd.DataFrame([10, 20, 30, 40, 50])
dFrame10Multiples

Unnamed: 0,0
0,10
1,20
2,30
3,40
4,50


In [50]:
dFrame10Multiples.loc[2]

0    30
Name: 2, dtype: int64

In [51]:
ResultDF.loc[:, "Arnab"]

Maths      90
Science    91
Hindi      97
Name: Arnab, dtype: int64

In [52]:
ResultDF.loc[["Science", "Hindi"]]

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika
Science,91,81,91,71,95
Hindi,97,96,88,67,99


### B) Boolean Indexing

In [63]:
ResultDF.loc[["Maths","Science"]] > 90

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika
Maths,False,True,False,False,True
Science,True,False,True,False,True


In [64]:
ResultDF.loc[:, ["Arnab", "Riya"]] > 90

Unnamed: 0,Arnab,Riya
Maths,False,False
Science,True,False
Hindi,True,False


## Accessing DataFrames Element through Slicing

In [57]:
ResultDF.loc["Maths": "Science"]

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika
Maths,90,92,89,81,94
Science,91,81,91,71,95


In [58]:
ResultDF.loc["Maths": "Science", "Arnab"]

Maths      90
Science    91
Name: Arnab, dtype: int64

In [60]:
ResultDF.loc["Maths": "Science", "Arnab": "Samridhi"]

Unnamed: 0,Arnab,Ramit,Samridhi
Maths,90,92,89
Science,91,81,91


In [66]:
ResultDF.loc["Maths": "Science", ["Arnab", "Samridhi"]]

Unnamed: 0,Arnab,Samridhi
Maths,90,89
Science,91,91


### Filtering Rows in DataFrames

In [67]:
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika
Maths,90,92,89,81,94
Science,91,81,91,71,95
Hindi,97,96,88,67,99


In [68]:
            # <------Rows----->
ResultDF.loc[[True, False, True]]
            # Maths Science Hindi

# Where their is True that Row will be Printing

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika
Maths,90,92,89,81,94
Hindi,97,96,88,67,99


## Joining, Merging and Concatenation of DataFrames

### A) Joining

In [74]:
dFrame1 = pd.DataFrame([[1, 2, 3], [4, 5], [6]] , columns = ["C1", "C2", "C3"], index = ["R1", "R2", "R3"])
dFrame1

Unnamed: 0,C1,C2,C3
R1,1,2.0,3.0
R2,4,5.0,
R3,6,,


In [75]:
dFrame2 = pd.DataFrame([[10, 20], [30], [40, 50]], columns = ["C2", "C5"], index = ["R4", "R2", "R5"])
dFrame2

Unnamed: 0,C2,C5
R4,10,20.0
R2,30,
R5,40,50.0


## Attributes of DataFrames

In [78]:
ForestArea = {

    "Assam" : pd.Series([78438, 2797, 10192, 15116], index = ["GeoArea", "VeryDense", "ModeratelyDense", "OpenForest"]),
    "Kerla" : pd.Series([38852, 1663, 9407, 9251], index = ["GeoArea", "VeryDense", "ModeratelyDense", "OpenForest"]),
    "Delhi" : pd.Series([1483, 6.72, 56.24, 129.45], index = ["GeoArea", "VeryDense", "ModeratelyDense", "OpenForest"]),
}

ForestArea

{'Assam': GeoArea            78438
 VeryDense           2797
 ModeratelyDense    10192
 OpenForest         15116
 dtype: int64,
 'Kerla': GeoArea            38852
 VeryDense           1663
 ModeratelyDense     9407
 OpenForest          9251
 dtype: int64,
 'Delhi': GeoArea            1483.00
 VeryDense             6.72
 ModeratelyDense      56.24
 OpenForest          129.45
 dtype: float64}

In [82]:
ForestAreaDF = pd.DataFrame(ForestArea)
ForestAreaDF

Unnamed: 0,Assam,Kerla,Delhi
GeoArea,78438,38852,1483.0
VeryDense,2797,1663,6.72
ModeratelyDense,10192,9407,56.24
OpenForest,15116,9251,129.45


In [83]:
ForestAreaDF.index

Index(['GeoArea', 'VeryDense', 'ModeratelyDense', 'OpenForest'], dtype='object')

In [84]:
ForestAreaDF.columns

Index(['Assam', 'Kerla', 'Delhi'], dtype='object')

In [85]:
ForestAreaDF.dtypes

Assam      int64
Kerla      int64
Delhi    float64
dtype: object

In [86]:
ForestAreaDF.values

array([[7.8438e+04, 3.8852e+04, 1.4830e+03],
       [2.7970e+03, 1.6630e+03, 6.7200e+00],
       [1.0192e+04, 9.4070e+03, 5.6240e+01],
       [1.5116e+04, 9.2510e+03, 1.2945e+02]])

In [88]:
ForestAreaDF.shape

(4, 3)

In [89]:
ForestAreaDF.size

12

In [90]:
ForestAreaDF.T

Unnamed: 0,GeoArea,VeryDense,ModeratelyDense,OpenForest
Assam,78438.0,2797.0,10192.0,15116.0
Kerla,38852.0,1663.0,9407.0,9251.0
Delhi,1483.0,6.72,56.24,129.45


In [91]:
ForestAreaDF.head(2)

Unnamed: 0,Assam,Kerla,Delhi
GeoArea,78438,38852,1483.0
VeryDense,2797,1663,6.72


In [92]:
ForestAreaDF.tail(2)

Unnamed: 0,Assam,Kerla,Delhi
ModeratelyDense,10192,9407,56.24
OpenForest,15116,9251,129.45


In [95]:
ForestAreaDF.empty

False

## Importing and Exporting Data between CSV Files and DataFrames

In [107]:
marks1 = pd.read_csv("Result-Data.csv", sep = ",", header = 0)
marks1

Unnamed: 0,RollNo,Name,Eco,Maths
0,1,Arnab,18,57
1,2,Kritika,23,45
2,3,Divyam,51,37
3,4,Vivaan,40,60
4,5,Aaroosh,18,27


In [109]:
marks2 = pd.read_csv("Result-Data.csv", sep = ",", names = ["RNo", "StudentName", "Sub1", "Sub2"])
marks2

Unnamed: 0,RNo,StudentName,Sub1,Sub2
0,RollNo,Name,Eco,Maths
1,1,Arnab,18,57
2,2,Kritika,23,45
3,3,Divyam,51,37
4,4,Vivaan,40,60
5,5,Aaroosh,18,27


In [110]:
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Malika
Maths,90,92,89,81,94
Science,91,81,91,71,95
Hindi,97,96,88,67,99


In [116]:
ResultDF.to_csv(path_or_buf = "ResultDF.csv", sep= ",")

In [4]:
Ration = pd.read_csv(r"C:\Users\dhana\Music\Anil_Ration_Grocery_List.csv", sep = ",")
Ration

Unnamed: 0,Tapioca Pearl / Sago,साबदुाणा
0,Split Pigeon Peas,तरू डाळ
1,Groundnut,शेंगदाणा
2,Flattened Rice,पोहे
3,Semolina,रवा
4,Brown Chickpeas,देशी चना
5,Mustard Oil,मोहरीचेतले
6,Sunflower Refined,सर्यू फर्य ूल रि फाइन्ड
7,Soyabean Refined,सोयाबीन रि फाइन्ड
8,Kanda Masala,कांदा मसाला
9,Haldi Powder,हळदी पावडर
