In [1]:
import pandas as pd
import numpy as np

In [2]:
#empty dataframe
df = pd.DataFrame()
df

In [3]:
#dataframe from numpy array 
data = np.array([[1,2,3],[4,5,6],[7,8,9]])
df = pd.DataFrame(data, columns=['A', 'B', 'C'], index=['row1', 'row2', 'row3'])
df

Unnamed: 0,A,B,C
row1,1,2,3
row2,4,5,6
row3,7,8,9


In [4]:
#create dataframe from dictionary
DATA = {
    'A': [1, 4, 7],
    'B': [2, 5, 8],
    'C': [3, 6, 9]
}
df = pd.DataFrame(DATA, index=['row1', 'row2', 'row3'])
df

Unnamed: 0,A,B,C
row1,1,2,3
row2,4,5,6
row3,7,8,9


In [62]:
#create dataframe from list of lists using zip 
Name = ['Alice', 'Bob', 'Charlie']
age = [25, 30, 35]

df = pd.DataFrame(list(zip(Name, age)), columns=['Name', 'Age'])
df

Unnamed: 0,Name,Age
0,Alice,25
1,Bob,30
2,Charlie,35


In [6]:
#create dataframe from list of lists

data = [['1', '2', '3'],['1', '2', '3'],['1', '2', '3'],['1', '2', '3']]
df = pd.DataFrame(data, columns=['a', 'b', 'c'])
df['c'] = df['c'].astype(float)
df

Unnamed: 0,a,b,c
0,1,2,3.0
1,1,2,3.0
2,1,2,3.0
3,1,2,3.0


In [7]:
#create dataframe using list of series
d = {'one': pd.Series([1,2,3],index=['a', 'b', 'c']),
     'two' : pd.Series([4,5,6],index=['a', 'b', 'c'])}

df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1,4
b,2,5
c,3,6


In [8]:
import pandas as pd

# Initialize data to Dicts of series.
d = {'one': pd.Series([10, 20, 30, 40],
                      index=['a', 'b', 'c', 'd']),
     'two': pd.Series([10, 20, 30, 40],
                      index=['a', 'b', 'c', 'd'])}

# creates Dataframe.
df = pd.DataFrame(d)

print(df)

   one  two
a   10   10
b   20   20
c   30   30
d   40   40


In [55]:
#create dataframe from list of lists
import pandas as pd
data = [['1', '2', '3'],['1', None, '3'],['1', '2', '3'],['1', '2', '3']]
df = pd.DataFrame(data, columns=['a', 'b', 'c'], index=['row1', 'row2', 'row3', 'row4'])
df['c'] = df['c'].astype(float)
df

Unnamed: 0,a,b,c
row1,1,2.0,3.0
row2,1,,3.0
row3,1,2.0,3.0
row4,1,2.0,3.0


In [56]:
df.index #accesing the index of the dataframe

Index(['row1', 'row2', 'row3', 'row4'], dtype='object')

In [58]:
print(df.index.name)
df.index.values

None


array(['row1', 'row2', 'row3', 'row4'], dtype=object)

In [None]:
# set custom index
Name = ['Alice', 'Bob', 'Charlie']
age = [25, 30, 35]

df = pd.DataFrame(list(zip(Name, age)), columns=['Name', 'Age'])

index = df.set_index('Name')
index


Unnamed: 0_level_0,Age
Name,Unnamed: 1_level_1
Alice,25
Bob,30
Charlie,35


In [None]:
#reset index
index1 = df.reset_index()
index1

Unnamed: 0,index,Name,Age
0,0,Alice,25
1,1,Bob,30
2,2,Charlie,35


In [37]:
data = {'Name': ['John', 'Alice', 'Bob', 'Eve', 'Charlie'],
        'Age': [25, 30, 22, 35, 28],
        'Gender': ['Male', 'Female', 'Male', 'Female', 'Male'],
        'Salary': [50000, 55000, 40000, 70000, 48000]}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,Gender,Salary
0,John,25,Male,50000
1,Alice,30,Female,55000
2,Bob,22,Male,40000
3,Eve,35,Female,70000
4,Charlie,28,Male,48000


In [43]:
df.sample(3)  # Randomly sample 3 rows from the DataFrame

Unnamed: 0,Name,Age,Gender,Salary
0,John,25,Male,50000
2,Bob,22,Male,40000
1,Alice,30,Female,55000


In [44]:
df.sort_index()

Unnamed: 0,Name,Age,Gender,Salary
0,John,25,Male,50000
1,Alice,30,Female,55000
2,Bob,22,Male,40000
3,Eve,35,Female,70000
4,Charlie,28,Male,48000


In [45]:
df.sort_index(axis=1)

Unnamed: 0,Age,Gender,Name,Salary
0,25,Male,John,50000
1,30,Female,Alice,55000
2,22,Male,Bob,40000
3,35,Female,Eve,70000
4,28,Male,Charlie,48000


### regular expressions

In [None]:
import re 

In [66]:
data = {
    'name': ['John Smith', 'Jane Doe', 'Alice Johnson', 'bob_99'],
    'email': ['john@example.com', 'jane@abc.net', 'alice123@my.org', 'bob99@site.com'],
    'phone': ['123-456-7890', '987.654.3210', '(123)456-7890', '4567891230']
}

df = pd.DataFrame(data)
df

Unnamed: 0,name,email,phone
0,John Smith,john@example.com,123-456-7890
1,Jane Doe,jane@abc.net,987.654.3210
2,Alice Johnson,alice123@my.org,(123)456-7890
3,bob_99,bob99@site.com,4567891230


In [68]:
df[df['email'].str.contains(r'\.com$', regex=True)]

Unnamed: 0,name,email,phone
0,John Smith,john@example.com,123-456-7890
3,bob_99,bob99@site.com,4567891230


In [70]:
df['username'] = df['email'].str.extract(r'^(\w+)')
df['username']

0        john
1        jane
2    alice123
3       bob99
Name: username, dtype: object

In [72]:
df['email'].str.extract(r'@(.+)\.')

Unnamed: 0,0
0,example
1,abc
2,my
3,site


In [80]:
df['phone'].str.replace(r'[\.\-\(\)]', '', regex=True)

0    1234567890
1    9876543210
2    1234567890
3    4567891230
Name: phone, dtype: object

In [None]:
df[df['name'].str.match(r'^[A-Z][a-zA-Z ]+$')]


Unnamed: 0,name,email,phone,username
0,John Smith,john@example.com,123-456-7890,john
1,Jane Doe,jane@abc.net,987.654.3210,jane
2,Alice Johnson,alice123@my.org,(123)456-7890,alice123


In [111]:
data = {
    'Name' : ['John', 'Alice', 'Bob', 'Eve'],
    'math' : [85, 90, 78, 92],
    'science' : [88, 95, 80, 89]
}

df = pd.DataFrame(data)
df.set_index('Name', inplace=True)
df

Unnamed: 0_level_0,math,science
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
John,85,88
Alice,90,95
Bob,78,80
Eve,92,89


In [100]:
df1 = df.stack()
df1

Name          
John   math       85
       science    88
Alice  math       90
       science    95
Bob    math       78
       science    80
Eve    math       92
       science    89
dtype: int64

In [101]:
df1.unstack()

Unnamed: 0_level_0,math,science
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
John,85,88
Alice,90,95
Bob,78,80
Eve,92,89


In [112]:
df = pd.DataFrame({
    'Name': ['Alice', 'Bob'],
    'Math': [90, 85],
    'Science': [95, 80]
})

pd.melt(df, id_vars=['Name'], var_name='sub', value_name='marks')
# melted_df = pd.melt(df, id_vars=['Name'], var_name='Subject', value_name='Score')

Unnamed: 0,Name,sub,marks
0,Alice,Math,90
1,Bob,Math,85
2,Alice,Science,95
3,Bob,Science,80
