
# DataFrame: two dimensional data structure

Two-dimensional, size-mutable, potentially heterogeneous tabular data.

Data structure also contains labeled axes (rows and columns). Arithmetic operations align on both row and column labels. Can be thought of as a dict-like container for Series objects. The primary pandas data structure.

# Creating dataframe using python list

In [1]:
import pandas as pd

In [2]:
data = {1,1,2,3,5,8,13}

In [3]:
df = pd.DataFrame(data)

In [4]:
df

Unnamed: 0,0
0,1
1,2
2,3
3,5
4,8
5,13


In [5]:
type(df)

pandas.core.frame.DataFrame

In [6]:
df = pd.DataFrame(data, columns = ['fib'])

In [7]:
df

Unnamed: 0,fib
0,1
1,2
2,3
3,5
4,8
5,13


In [8]:
type(df['fib'])

pandas.core.series.Series

# Creating dataframe using list of list

In [9]:
data = [['ram', 10],['shyam', 11], ['rita', 22]]

In [10]:
df = pd.DataFrame(data)

In [11]:
df

Unnamed: 0,0,1
0,ram,10
1,shyam,11
2,rita,22


In [12]:
df = pd.DataFrame(data, columns = ['Name', 'no. of choice'])

In [13]:
df

Unnamed: 0,Name,no. of choice
0,ram,10
1,shyam,11
2,rita,22


In [14]:
df['Name']

0      ram
1    shyam
2     rita
Name: Name, dtype: object

In [15]:
type(df['Name'])

pandas.core.series.Series

In [16]:
df['no. of choice']

0    10
1    11
2    22
Name: no. of choice, dtype: int64

In [17]:
df['no. of choice'].values

array([10, 11, 22])

In [18]:
df

Unnamed: 0,Name,no. of choice
0,ram,10
1,shyam,11
2,rita,22


In [19]:
df['Name'].values

array(['ram', 'shyam', 'rita'], dtype=object)

In [20]:
df['Name'].values.tolist()

['ram', 'shyam', 'rita']

In [21]:
df.values

array([['ram', 10],
       ['shyam', 11],
       ['rita', 22]], dtype=object)

In [22]:
df.values[:, 0]

array(['ram', 'shyam', 'rita'], dtype=object)

# Accessing column values of pandas dataframe

In [23]:
df = pd.DataFrame(data, columns = ['Name', 'no. of chocolate'])

In [24]:
df

Unnamed: 0,Name,no. of chocolate
0,ram,10
1,shyam,11
2,rita,22


In [25]:
df['Name']

0      ram
1    shyam
2     rita
Name: Name, dtype: object

In [26]:
df.Name

0      ram
1    shyam
2     rita
Name: Name, dtype: object

In [27]:
df['no. of chocolate']

0    10
1    11
2    22
Name: no. of chocolate, dtype: int64

In [28]:
df.no.of chocolate # won't work

SyntaxError: ignored

# Creating dataframe using python dictionary

In [66]:
student = {
    "name": ['Himal', "Nirajan", 'Dinesh', 'Sanjaya', "Arjun", 'Krishna', 'Sita', 'Gita', 'Rita'],
    'gender': ['M', "M", "M", "M", "M", "M", 'F', 'F', 'F']
}

In [67]:
df = pd.DataFrame(student)

In [68]:
df

Unnamed: 0,name,gender
0,Himal,M
1,Nirajan,M
2,Dinesh,M
3,Sanjaya,M
4,Arjun,M
5,Krishna,M
6,Sita,F
7,Gita,F
8,Rita,F


In [69]:
df['gender']

0    M
1    M
2    M
3    M
4    M
5    M
6    F
7    F
8    F
Name: gender, dtype: object

In [70]:
# lowercase all gender values
df['gender'] = df['gender'].str.lower()

In [71]:
df

Unnamed: 0,name,gender
0,Himal,m
1,Nirajan,m
2,Dinesh,m
3,Sanjaya,m
4,Arjun,m
5,Krishna,m
6,Sita,f
7,Gita,f
8,Rita,f


In [72]:
len(df)

9

In [73]:
list(range(9))

[0, 1, 2, 3, 4, 5, 6, 7, 8]

In [74]:
df['some_val'] = list(range(9))

In [75]:
df

Unnamed: 0,name,gender,some_val
0,Himal,m,0
1,Nirajan,m,1
2,Dinesh,m,2
3,Sanjaya,m,3
4,Arjun,m,4
5,Krishna,m,5
6,Sita,f,6
7,Gita,f,7
8,Rita,f,8


# Delete a column in dataframe

* pop()
* del

In [76]:
df.head(2)

Unnamed: 0,name,gender,some_val
0,Himal,m,0
1,Nirajan,m,1


In [77]:
df.pop('some_val')

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
Name: some_val, dtype: int64

In [78]:
df

Unnamed: 0,name,gender
0,Himal,m
1,Nirajan,m
2,Dinesh,m
3,Sanjaya,m
4,Arjun,m
5,Krishna,m
6,Sita,f
7,Gita,f
8,Rita,f


In [79]:
# del df['gender']   -----> Deletes Gender Column

# Creating pandas DataFrame using pandas series

In [80]:
d = {
    'one': pd.Series([1,2,3], index = ['a', 'b', 'c']),
     'two': pd.Series([4,5,6,7],  index = ['a', 'b', 'c', 'd'])
}

In [81]:
df = pd.DataFrame(d)

In [82]:
df

Unnamed: 0,one,two
a,1.0,4
b,2.0,5
c,3.0,6
d,,7


In [83]:
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [84]:
import numpy as np

In [85]:
type(np.nan)

float

# Checking Missing Dataframes in Pandas dataframe

In [86]:
df

Unnamed: 0,one,two
a,1.0,4
b,2.0,5
c,3.0,6
d,,7


In [87]:
df.isnull()

Unnamed: 0,one,two
a,False,False
b,False,False
c,False,False
d,True,False


In [88]:
df.isnull().sum()

one    1
two    0
dtype: int64

# Creating csv using pandas dataframe

In [52]:
data = {'Rank':[1, 2, 3, 4, 5],
       'Language': ['Python', 'Java',
                   'Javascript',
                   'C#', 'PHP'],
       'Share':[29.88, 19.05, 8.17,
               7.3, 6.15],
       'Trend':[np.nan, -1.8, 0.1, -0.1, -1.0]}

In [53]:
df = pd.DataFrame(data)

In [54]:
df

Unnamed: 0,Rank,Language,Share,Trend
0,1,Python,29.88,
1,2,Java,19.05,-1.8
2,3,Javascript,8.17,0.1
3,4,C#,7.3,-0.1
4,5,PHP,6.15,-1.0


In [55]:
df.to_csv('somedata.csv', index = False)

# Reading csv files using pandas

In [56]:
filepath = '/content/somedata.csv'

In [57]:
import pandas as pd
lang = pd.read_csv(filepath)

In [58]:
lang

Unnamed: 0,Rank,Language,Share,Trend
0,1,Python,29.88,
1,2,Java,19.05,-1.8
2,3,Javascript,8.17,0.1
3,4,C#,7.3,-0.1
4,5,PHP,6.15,-1.0


In [89]:
lang.isnull().sum()

Language    0
Share       0
Trend       1
dtype: int64

In [90]:
lang.head(2)

Unnamed: 0,Language,Share,Trend
0,Python,29.88,
1,Java,19.05,-1.8


In [61]:
lang.tail(2)

Unnamed: 0,Rank,Language,Share,Trend
3,4,C#,7.3,-0.1
4,5,PHP,6.15,-1.0


In [62]:
lang.columns  #  ---------> displays columns names of lang

Index(['Rank', 'Language', 'Share', 'Trend'], dtype='object')

In [63]:
lang.columns[1:]

Index(['Language', 'Share', 'Trend'], dtype='object')

In [64]:
usecols = ['Language', 'Share', 'Trend']

lang = pd.read_csv(filepath, usecols = usecols)

In [65]:
lang

Unnamed: 0,Language,Share,Trend
0,Python,29.88,
1,Java,19.05,-1.8
2,Javascript,8.17,0.1
3,C#,7.3,-0.1
4,PHP,6.15,-1.0


# Creating table in Markdown

| Name | Roll no. |
| :--- | ---:   |
| Nirajan | 1 |
| Ram | 2 |
| Narayan | 3 |
| Hari | 4 |


## How to Write in Markdown?

Want to create table in Markdown, Check the below cell. I am writing in the cell to create a step-wise process. Normally, you do the reverse; write the instructions in Markdown and write the algorithm but this will create another table.

In [None]:
"""
This works in Markdown only.

# Step 1:    |        |         |      --------> This is first row. Write " | " and "Column name" and to complete the cell, add another " | ".
         You can find " | " in the backward slash key next to backspace key.

# Step 2: Write " |       | " and then between those write three dash/ " - ". Like this: | --- |. This should correspond to the number of columns from above labelled row.

# Step 3: Write " |       | " and then fill the values between teh bars. Like this: | Nirajan |. Follow this across the cell and below the cell.

Here is an example of a Table by Following the steps:

| Name | Age | Gender |
| ---  | --- | ---    |
| Ram | 21 | Male |
| Shyam | 16 | Male |
| Gita | 24 | Female |
| Rita | 53 | Female |

If I copy and paste above table in Markdown, it looks like this:
"""

| Name | Age | Gender |
| ---  | --- | ---    |
| Ram | 21 | Male |
| Shyam | 16 | Male |
| Gita | 24 | Female |
| Rita | 53 | Female |

In [None]:
"""
# In the part | --- |, if you add " : " before or after " --- " or on both side, it orients the values towards the side " : " is written.

If | :--- |, then the values aligns to the left. If | ---: |, then the values aligns to the right. If | :---: |, the values align at the centre.

Check the example:

| Name | Age | Gender |
| :---  | :---: | ---: |
| Ram | 21 | Male |
| Shyam | 16 | Male |
| Gita | 24 | Female |
| Rita | 53 | Female |
"""

|S. No.| Name | Age | Gender |
| --- | :---  | :---: | ---: |
| 1 | Ram | 21 | Male |
| 2 | Shyam | 16 | Male |
| 3 | Gita | 24 | Female |
| 4 | Rita | 53 | Female |

![Another One](https://media1.giphy.com/media/3o7WTxyMSVN7lM5I7C/giphy.gif)

| | | |
|---|---|---|---|