# **Pandas**

In [1]:
import pandas as pd

mydataset = {
    'Batch': ['C1','C2','C3','C4'],
    'Strength': [21,19,21,23]
}

df = pd.DataFrame(mydataset)

print(df)

  Batch  Strength
0    C1        21
1    C2        19
2    C3        21
3    C4        23


In [2]:
a = [10,70,20]
myvar = pd.Series(a)\

print(myvar)

0    10
1    70
2    20
dtype: int64


# Labels:
If nothing else is specified, the values are labeled with their index number. First value has index 0, second value has index 1 etc.

This label can be used to access a specified value.

In [3]:

print(myvar[0])

10


# Create Labels
With the index argument, you can name your own labels.

In [6]:
import pandas as pd

a = [1, 7, 2]

myvar = pd.Series(a, index = ["x", "y", "z"])

print(myvar)

x    1
y    7
z    2
dtype: int64


# Key/Value Objects as Series
You can also use a key/value object, like a dictionary, when creating a Series.

In [12]:
import pandas as pd

result = {"SIC": 55, "AM": 60, "EDS":66}

sr = pd.Series(result)

print(sr)

SIC    55
AM     60
EDS    66
dtype: int64


In [13]:
import pandas as pd

result = {"SIC": 55, "AM": 60, "EDS":66}

sr = pd.Series(result,index=['SIC','EDS'])

print(sr)

SIC    55
EDS    66
dtype: int64


# DataFrames
Data sets in Pandas are usually multi-dimensional tables, called DataFrames.

Series is like a column, a DataFrame is the whole table.

#Pandas DataFrames
A Pandas DataFrame is a 2 dimentional data structure , like a 2 dimentional array, or a table with rows and columns


In [16]:
import pandas as pd

data = {
  "Course": ['SIC','DT', 'EDS','ENG'],
  "Credits": [4, 3, 4,2]
}

df = pd.DataFrame(data)

print(df)

  Course  Credits
0    SIC        4
1     DT        3
2    EDS        4
3    ENG        2


Pandas use the loc attribute to return one or more specified row(s)

In [17]:
print(df.loc[0])

Course     SIC
Credits      4
Name: 0, dtype: object


In [18]:
print(df.loc[[0,1]])

  Course  Credits
0    SIC        4
1     DT        3


# Load Files Into a DataFrame
If your data sets are stored in a file, Pandas can load them into a DataFrame.

Tip: use to_string() to print the entire DataFrame.

In [20]:
import pandas as pd

df = pd.read_csv('/content/testmarks2.csv')

print(df)

   RollNo    SIC     AM     FE     IC
0     801  28.48  34.18  30.56  22.23
1     802  28.10  33.72  30.68  22.82
2     803  26.16  31.39  28.20  22.53
3     804  26.16  31.39  28.78  20.93
4     805  26.10  31.32  28.22  20.82
5     806  25.45  30.54  27.73  21.05
6     807  26.16  31.39  28.01  20.51
7     808  27.44  32.93  28.83  22.08
8     809  28.63  34.35  31.03  22.68
9     810  30.35  36.42  31.38  23.10


In [21]:
import pandas as pd

df = pd.read_csv('/content/result2.csv')

print(df)

      Name  Maths  English  Physics  Chemistry  Biology
0     Amit     75       45       60         65       62
1    Sumit     63       56       58         62       60
2    Anita     80       58       74         58       65
3   Sujata     42       53       57         56       45
4     Ajit     58       69       65         58       64
5    Sujit     57       68       90         97       67
6      Ana     78       75       87         89       65
7     Tina     85       58       89         56       63
8     Mina     57       45       54         66       64
9     Lata     54       74       23         77       62
10   smita     89       54       54         23       60
11  asmita     54       55       63         74       65
12   suraj     63       66       78         56       33
13   netra     23       87       44         87       87


# Read JSON
Big data sets are often stored, or extracted as JSON.

JSON is plain text, but has the format of an object, and is well known in the world of programming, including Pandas.

In our examples we will be using a JSON file called 'data.json'.

Open data.json.

In [24]:
import pandas as pd

df = pd.read_json('/content/sampleJSON.js')

print(df.to_string())


     Duration  Pulse  Maxpulse  Calories
0          60    110       130     409.1
1          60    117       145     479.0
2          60    103       135     340.0
3          45    109       175     282.4
4          45    117       148     406.0
5          60    102       127     300.5
6          60    110       136     374.0
7          45    104       134     253.3
8          30    109       133     195.1
9          60     98       124     269.0
10         60    103       147     329.3
11         60    100       120     250.7
12         60    106       128     345.3
13         60    104       132     379.3
14         60     98       123     275.0
15         60     98       120     215.2
16         60    100       120     300.0
17         45     90       112       NaN
18         60    103       123     323.0
19         45     97       125     243.0
20         60    108       131     364.2
21         45    100       119     282.0
22         60    130       101     300.0
23         45   

# Viewing the Data
One of the most used method for getting a quick overview of the DataFrame, is the head() method.

The head() method returns the headers and a specified number of rows, starting from the top.

In [25]:
import pandas as pd

df = pd.read_csv('/content/result2.csv')

print(df.head(10))

     Name  Maths  English  Physics  Chemistry  Biology
0    Amit     75       45       60         65       62
1   Sumit     63       56       58         62       60
2   Anita     80       58       74         58       65
3  Sujata     42       53       57         56       45
4    Ajit     58       69       65         58       64
5   Sujit     57       68       90         97       67
6     Ana     78       75       87         89       65
7    Tina     85       58       89         56       63
8    Mina     57       45       54         66       64
9    Lata     54       74       23         77       62


Note: if the number of rows is not specified, the head() method will return the top 5 rows.

In [26]:
import pandas as pd

df = pd.read_csv('/content/result2.csv')

print(df.head())

     Name  Maths  English  Physics  Chemistry  Biology
0    Amit     75       45       60         65       62
1   Sumit     63       56       58         62       60
2   Anita     80       58       74         58       65
3  Sujata     42       53       57         56       45
4    Ajit     58       69       65         58       64


There is also a tail() method for viewing the last rows of the DataFrame.

The tail() method returns the headers and a specified number of rows, starting from the bottom.

In [27]:
import pandas as pd

df = pd.read_csv('/content/result2.csv')

print(df.tail())

      Name  Maths  English  Physics  Chemistry  Biology
9     Lata     54       74       23         77       62
10   smita     89       54       54         23       60
11  asmita     54       55       63         74       65
12   suraj     63       66       78         56       33
13   netra     23       87       44         87       87
