In [3]:
# Dataframe from Dictionary
import pandas as pd

data = {'Apple': [3, 2, 0, 1], 'Orange': [0, 3, 7, 2]}
df = pd.DataFrame(data)
print(df)

   Apple  Orange
0      3       0
1      2       3
2      0       7
3      1       2


In [4]:
# Dataframe with custom index
import pandas as pd
data = {'Apple': [3, 2, 0, 1], 'Orange': [0, 3, 7, 2]}
df = pd.DataFrame(data, index=['Ahmad','Ali','Rashed','Hamza'])
print(df)
print("\nRow for Ahmad:")
print(df.loc['Ahmad'])

        Apple  Orange
Ahmad       3       0
Ali         2       3
Rashed      0       7
Hamza       1       2

Row for Ahmad:
Apple     3
Orange    0
Name: Ahmad, dtype: int64


In [5]:
# Dataframe from List of Dictionaries
import pandas as pd
data = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]
df = pd.DataFrame(data, index=['First', 'Second'])
print(df)

        a   b     c
First   1   2   NaN
Second  5  10  20.0


In [6]:
# Dataframe from Specified Columns
import pandas as pd

data = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]
df1 = pd.DataFrame(data, index=['First', 'Second'], columns=['a', 'b'])
df2 = pd.DataFrame(data, index=['First', 'Second'], columns=['a', 'b1'])
print(df1)
print("\n==============================\n")
print(df2)

        a   b
First   1   2
Second  5  10


        a  b1
First   1 NaN
Second  5 NaN


In [7]:
# Dataframe from Dict of Series
import pandas as pd

d = {'one': pd.Series([1, 2, 3], index=['a', 'b', 'c']),
     'two': pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d)
print(df)

   one  two
a  1.0    1
b  2.0    2
c  3.0    3
d  NaN    4


In [8]:
# Add Column to Dataframe
import pandas as pd

d = {'one': pd.Series([1, 2, 3], index=['a', 'b', 'c']),
     'two': pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d)
df['three'] = pd.Series([10, 20, 30], index=['a', 'b', 'c'])
df['four'] = df['one'] + df['two']
print(df)

   one  two  three  four
a  1.0    1   10.0   2.0
b  2.0    2   20.0   4.0
c  3.0    3   30.0   6.0
d  NaN    4    NaN   NaN


In [9]:
# Delete Column from Dataframe
import pandas as pd

d = {'one': pd.Series([1, 2, 3], index=['a', 'b', 'c']),
        'two': pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']),
        'three': pd.Series([10, 20, 30], index=['a', 'b', 'c'])
        }
df = pd.DataFrame(d)
print("Original DataFrame:")
print(df)

del df['one']
print("\nDataFrame after deleting 'two' column:")
print(df)
df.pop('three')
print("\nDataFrame after popping 'three' column:")
print(df)

Original DataFrame:
   one  two  three
a  1.0    1   10.0
b  2.0    2   20.0
c  3.0    3   30.0
d  NaN    4    NaN

DataFrame after deleting 'two' column:
   two  three
a    1   10.0
b    2   20.0
c    3   30.0
d    4    NaN

DataFrame after popping 'three' column:
   two
a    1
b    2
c    3
d    4


In [10]:
# Slicing Rows from Dataframe
import pandas as pd

d = {'one': pd.Series([1, 2, 3], index=['a', 'b', 'c']),
        'two': pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d)
print("Original DataFrame:")
print(df)
print("\nSlicing rows 2 to 4:")
print(df[2:4])


Original DataFrame:
   one  two
a  1.0    1
b  2.0    2
c  3.0    3
d  NaN    4

Slicing rows 2 to 4:
   one  two
c  3.0    3
d  NaN    4


In [11]:
# Add an Delete Rows from Dataframe
import pandas as pd

d = {'one': pd.Series([1, 2, 3], index=['a', 'b', 'c']),
        'two': pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d)
print("Original DataFrame:")
print(df)

df2 = pd.DataFrame([[5,6],[7,8]],columns = ['a','b'])
df = pd.concat([df,df2])

print("\nAfter appending df2:")
print(df)

df = df.drop(0)
print("\nAfter deleting row '0':")
print(df)

Original DataFrame:
   one  two
a  1.0    1
b  2.0    2
c  3.0    3
d  NaN    4

After appending df2:
   one  two    a    b
a  1.0  1.0  NaN  NaN
b  2.0  2.0  NaN  NaN
c  3.0  3.0  NaN  NaN
d  NaN  4.0  NaN  NaN
0  NaN  NaN  5.0  6.0
1  NaN  NaN  7.0  8.0

After deleting row '0':
   one  two    a    b
a  1.0  1.0  NaN  NaN
b  2.0  2.0  NaN  NaN
c  3.0  3.0  NaN  NaN
d  NaN  4.0  NaN  NaN
1  NaN  NaN  7.0  8.0


In [12]:
# Datafram reindexing
df1 = pd.DataFrame(
    {
        "A": [1, 5, 3, 4, 2],
        "B": [3, 2, 4, 3, 4],
        "C": [2, 2, 7, 3, 4],
        "D": [4, 3, 6, 12, 7],
    },
    index=["A1", "A2", "A3", "A4", "A5"],
)

df2 = pd.DataFrame(
    {
        "A": [10, 11, 7, 8, 5],
        "B": [21, 5, 32, 4, 6],
        "C": [11, 21, 23, 7, 9],
        "D": [1, 5, 3, 8, 6],
    },
    index=["A1", "A3", "A4", "A7", "A8"],
)

print("DataFrame 1:")
print(df1)

print("\nDataFrame 2:")
print(df2)
df1 = df1.reindex_like(df2)

print("\nReindexed DataFrame 1:")
print(df1)


DataFrame 1:
    A  B  C   D
A1  1  3  2   4
A2  5  2  2   3
A3  3  4  7   6
A4  4  3  3  12
A5  2  4  4   7

DataFrame 2:
     A   B   C  D
A1  10  21  11  1
A3  11   5  21  5
A4   7  32  23  3
A7   8   4   7  8
A8   5   6   9  6

Reindexed DataFrame 1:
      A    B    C     D
A1  1.0  3.0  2.0   4.0
A3  3.0  4.0  7.0   6.0
A4  4.0  3.0  3.0  12.0
A7  NaN  NaN  NaN   NaN
A8  NaN  NaN  NaN   NaN


In [13]:
# Concatinating Dataframes
import pandas as pd

df1 = pd.DataFrame({"Name": ["A", "B"], "SSN": [10, 20], "marks": [90, 95]})

df2 = pd.DataFrame({"Name": ["B", "C"], "SSN": [25, 30], "marks": [80, 97]})

df3 = pd.concat([df1, df2])

print(df3)

  Name  SSN  marks
0    A   10     90
1    B   20     95
0    B   25     80
1    C   30     97


In [14]:
# Handling Datafram Categorical Data
import pandas as pd
import numpy as np

cat = pd.Categorical(["a", "b", "c", "a", "b", "c"])

print(cat)

cat = pd.Categorical(["a", "c", "c", np.nan], categories=["b", "a", "c"])

df = pd.DataFrame({"cat": cat, "s": ["a", "c", "c", np.nan]})

print(df.describe())

print(df["cat"].describe())

['a', 'b', 'c', 'a', 'b', 'c']
Categories (3, object): ['a', 'b', 'c']
       cat  s
count    3  3
unique   2  2
top      c  c
freq     2  2
count     3
unique    2
top       c
freq      2
Name: cat, dtype: object


In [15]:
# CSV File Handling with Dataframe
import pandas as pd
df = pd.read_csv('data.csv',index_col=0)
print(df)


        Apple  Orange
Ahmad       3       0
Ali         2       3
Rashed      0       7
Hamza       1       2


In [16]:
# Reading JSON File with Dataframe
import pandas as pd
df = pd.read_json('data.json')
print(df)


        apples  oranges
Ahmad        3        0
Ali          2        3
Rashed       0        7
Hamza        1        2


In [17]:
# handling duplicates in Dataframe
import pandas as pd
data = {'col1':[1,2,2,3],'col2':['a','b','b','c']}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

temp_df = pd.concat([df,df],ignore_index=True)
print("After appending")
print(temp_df)
print("Shape before removing duplicates:",temp_df.shape)

temp_df = temp_df.drop_duplicates()
print("After removing duplicates:")
print(temp_df)
print("Shape after removing duplicates:",temp_df.shape)



Original DataFrame:
   col1 col2
0     1    a
1     2    b
2     2    b
3     3    c
After appending
   col1 col2
0     1    a
1     2    b
2     2    b
3     3    c
4     1    a
5     2    b
6     2    b
7     3    c
Shape before removing duplicates: (8, 2)
After removing duplicates:
   col1 col2
0     1    a
1     2    b
3     3    c
Shape after removing duplicates: (3, 2)


In [18]:
# Describing a Dataframe
import pandas as pd 

data = {'Scores': [90, 80, 70, 60, 85]}
df = pd.DataFrame(data)
print(df.describe())


          Scores
count   5.000000
mean   77.000000
std    12.041595
min    60.000000
25%    70.000000
50%    80.000000
75%    85.000000
max    90.000000
