# Pandas

### Creating a Series, Index, Data Frame using Padas Library

In [58]:
# Creating Series

import pandas as pd
import numpy as np

# Creating empty series
ser = pd.Series()
print(ser)

# Simple array
data = np.array(['g', 'e', 'e', 'k', 's'])
ser = pd.Series(data)
print(ser)

Series([], dtype: float64)
0    g
1    e
2    e
3    k
4    s
dtype: object


  ser = pd.Series()


### Reading CSV file

In [56]:
import pandas as pd
data = pd.read_csv("continent.csv")
data

Unnamed: 0,Continent
0,Africa
1,Antarctica
2,Asia
3,Australia
4,Europe
5,North America
6,South America


### Creating a Panda Data Frame

In [55]:
import pandas as pd

data = {'Name' : ['Tom', 'Nick', 'Krish', 'Jack'], 'Age' : [20, 21, 19, 18]}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age
0,Tom,20
1,Nick,21
2,Krish,19
3,Jack,18


In [54]:
# Creating a dataframe from dictionaries
import pandas as pd 

data_dict = {
    'Name' : ['Alice', 'Bob', 'Charlie', 'David'],
    'Age' : [25, 30, 22, 28],
    'City' : ['New York', 'Los Angeles', 'Chicago', 'Houston']
}

df_from_dict = pd.DataFrame(data_dict)
print("DataFrame from Dictionary :")
df_from_dict

DataFrame from Dictionary :


Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,Los Angeles
2,Charlie,22,Chicago
3,David,28,Houston


In [53]:
# Creating a dataframe from list of dictionaries
import pandas as pd

data_list_of_dicts = [
    {'Name': 'Emily', 'Age': 29, 'City': 'San Francisco'},
    {'Name': 'Frank', 'Age': 35, 'City': 'Seattle'}
]

df_from_list_of_dicts = pd.DataFrame(data_list_of_dicts)
print("\nDataFrame from list of Dictionaries :")
df_from_list_of_dicts


DataFrame from list of Dictionaries :


Unnamed: 0,Name,Age,City
0,Emily,29,San Francisco
1,Frank,35,Seattle


### Selection and Reindexing

In [39]:
info = pd.DataFrame(
    {
        "P": [4, 7, 1, 8, 9],
        "Q": [6, 8, 10, 15,11],
        "R": [17, 13, 12 ,16 , 14],
        "S": [15, 19, 7, 21, 9]
    },
    index = ["Binod", "Parkour", "Susawat", "Barmunda", "Nagraj"]
)

info

Unnamed: 0,P,Q,R,S
Binod,4,6,17,15
Parkour,7,8,13,19
Susawat,1,10,12,7
Barmunda,8,15,16,21
Nagraj,9,11,14,9


In [40]:
# Reindexing with new index values
info.reindex(["A", "B", "C", "D", "E"])

Unnamed: 0,P,Q,R,S
A,,,,
B,,,,
C,,,,
D,,,,
E,,,,


In [44]:
# Filling missing values by 100
info.reindex(["A", "B", "C", "D", "E"], fill_value = 100)

Unnamed: 0,P,Q,R,S
A,100,100,100,100
B,100,100,100,100
C,100,100,100,100
D,100,100,100,100
E,100,100,100,100


### Pandas Sort

In [63]:
# To print in ascending
import numpy as np
import pandas as pd

unsorted_df = pd.DataFrame(np.random.randn(10, 2), index = [1, 4, 6, 2, 3, 5, 9, 8, 8, 7], columns = ['col2', 'col1'])
sorted_df = unsorted_df.sort_index()
sorted_df

Unnamed: 0,300,200
1,0.522806,-0.110053
2,1.230612,-0.266669
3,-0.636141,0.710222
4,0.357833,-0.34491
5,0.66171,-0.182309
6,-1.235146,0.130424
7,0.2074,-0.647929
8,0.685157,0.557144
8,-0.588283,0.535374
9,0.880464,0.718991


In [64]:
# To print in Descending
import numpy as np
import pandas as pd

unsorted_df = pd.DataFrame(np.random.randn(10, 2), index = [1, 4, 6, 2, 3, 5, 9, 8, 8, 7], columns = ['col2', 'col1'])
sorted_df = unsorted_df.sort_index(ascending = False)
sorted_df

Unnamed: 0,300,200
9,0.744863,0.025226
8,0.16493,-1.203812
8,-0.7288,0.44826
7,1.597048,0.238087
6,-0.33042,0.177229
5,-0.59031,-1.788604
4,2.250192,0.123192
3,0.612048,0.830038
2,0.486872,0.571827
1,-0.717898,-0.343579


### Sort the Column

In [66]:
# To print in Descending
import numpy as np
import pandas as pd

unsorted_df = pd.DataFrame(np.random.randn(10, 2), index = [1, 4, 6, 2, 3, 5, 9, 8, 8, 7], columns = ['col2', 'col1'])
sorted_df = unsorted_df.sort_index(axis = 1)
sorted_df

Unnamed: 0,col1,col2
1,2.312505,-0.200372
4,1.380339,1.76145
6,0.204933,0.881055
2,-0.762669,0.886892
3,0.710679,1.944482
5,-0.740211,-0.978287
9,0.537175,-1.986783
8,1.174445,-0.793102
8,0.566519,0.118929
7,0.127292,1.026437


### By Values

In [67]:
import pandas as pd
import numpy as np

unsorted_df = pd.DataFrame({'col1': [2, 1, 1, 1], 'col2': [1, 3, 2, 4]})
sorted_df = unsorted_df.sort_values(by = 'col1')
sorted_df

Unnamed: 0,col1,col2
1,1,3
2,1,2
3,1,4
0,2,1


# Working with text data and statistical function in Pandas

In [70]:
import pandas as pd
import numpy as np
s = pd.Series(['Tom', 'William Rick', 'John', 'Alber@t', np.nan, '1234', 'SteveSith'])
print(s.str.lower())

0             tom
1    william rick
2            john
3         alber@t
4             NaN
5            1234
6       stevesith
dtype: object


In [71]:
import pandas as pd
import numpy as np
s = pd.Series(['Tom', 'William Rick', 'John', 'Alber@t', np.nan, '1234', 'SteveSith'])
print(s.str.upper())

0             TOM
1    WILLIAM RICK
2            JOHN
3         ALBER@T
4             NaN
5            1234
6       STEVESITH
dtype: object


### Statistical Function

In [79]:
import pandas as pd

data = {
    'Math': [90, 67, 89, 78, 89, 67],
    'Science': [92, 87, 89, 78, 89, 67],
    'English': [95, 67, 89, 90, 89, 44]
}

df = pd.DataFrame(data)

print("DataFrame = \n", df)

print("\nSum = \n", df.sum())

DataFrame = 
    Math  Science  English
0    90       92       95
1    67       87       67
2    89       89       89
3    78       78       90
4    89       89       89
5    67       67       44

Sum = 
 Math       480
Science    502
English    474
dtype: int64


In [75]:
import pandas as pd

data = {
    'Math': [90, 67, 89, 78, 89, 67],
    'Science': [92, 87, 89, 78, 89, 67],
    'English': [95, 67, 89, 90, 89, 44]
}

df = pd.DataFrame(data)

print("DataFrame = \n", df)

print("\nSum = \n", df.count())

DataFrame = 
    Math  Science  English
0    90       92       95
1    67       87       67
2    89       89       89
3    78       78       90
4    89       89       89
5    67       67       44

Sum = 
 Math       6
Science    6
English    6
dtype: int64


In [76]:
import pandas as pd

data = {
    'Math': [90, 67, 89, 78, 89, 67],
    'Science': [92, 87, 89, 78, 89, 67],
    'English': [95, 67, 89, 90, 89, 44]
}

df = pd.DataFrame(data)

print("DataFrame = \n", df)

print("\nSum = \n", df.max())

DataFrame = 
    Math  Science  English
0    90       92       95
1    67       87       67
2    89       89       89
3    78       78       90
4    89       89       89
5    67       67       44

Sum = 
 Math       90
Science    92
English    95
dtype: int64


In [81]:
import pandas as pd

data = {
    'Math': [90, 67, 89, 78, 89, 67],
    'Science': [92, 87, 89, 78, 89, 67],
    'English': [95, 67, 89, 90, 89, 44]
}

df = pd.DataFrame(data)

print("DataFrame = \n", df)

print("\nSum = \n", df.median())

DataFrame = 
    Math  Science  English
0    90       92       95
1    67       87       67
2    89       89       89
3    78       78       90
4    89       89       89
5    67       67       44

Sum = 
 Math       83.5
Science    88.0
English    89.0
dtype: float64


In [85]:
# Problem 1 - Creating a series

import pandas as pd

data = pd.Series(
                    ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], 
                    index = ['day1', 'day2', 'day3', 'day4', 'day5', 'day6', 'day7']
                )
data

day1       Monday
day2      Tuesday
day3    Wednesday
day4     Thursday
day5       Friday
day6     Saturday
day7       Sunday
dtype: object

In [88]:
# Problem 2 - Indexing and slicing series

print(data['day4'])

Thursday
