In [23]:
import pandas as pd

In [3]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'San Francisco', 'Los Angeles']
}

df = pd.DataFrame(data)

In [4]:
df.sort_values(by='Age', ascending=False)

Unnamed: 0,Name,Age,City
2,Charlie,35,Los Angeles
1,Bob,30,San Francisco
0,Alice,25,New York


In [6]:
df['Age'].mean()

30.0

In [7]:
# renaming column
data = {
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': [7, 8, 9]
}

df = pd.DataFrame(data)

# Rename columns
df.rename(columns={'A': 'Column_A', 'B': 'Column_B', 'C': 'Column_C'}, inplace=True)
print(df)

   Column_A  Column_B  Column_C
0         1         4         7
1         2         5         8
2         3         6         9


In [31]:
# concaternating dataframe
data1 = {
    'Name': ['Alice', 'Bob'],
    'Age': [25, 30]
}

data2 = {
    'Name': ['Charlie', 'David'],
    'Age': [35, 40]
}

df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

# Concatenate DataFrames vertically
df_concatenated = pd.concat([df1, df2])
print(df_concatenated)

print("\n")

df_concatenated = pd.concat([df1, df2], ignore_index=True)
print(df_concatenated)

      Name  Age
0    Alice   25
1      Bob   30
0  Charlie   35
1    David   40


      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
3    David   40


In [35]:
# merging dataframe
data1 = {
    'ID': [1, 2, 3, 4],
    'Name': ['Alice', 'Bob', 'Charlie', 'Sarah']
}

data2 = {
    'ID': [1, 2, 3],
    'Age': [25, 30, 35]
}

df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

# Merge DataFrames on the 'ID' column
df_merged = pd.merge(df1, df2, how='inner', on='ID') # how can be outer, right
print(df_merged)

print("\n")

# join DataFrames
df1.set_index('ID', inplace=True)
df2.set_index('ID', inplace=True)
joined_df = df1.join(df2)
print(joined_df)

   ID     Name  Age
0   1    Alice   25
1   2      Bob   30
2   3  Charlie   35


       Name   Age
ID               
1     Alice  25.0
2       Bob  30.0
3   Charlie  35.0
4     Sarah   NaN


In [28]:
# grouping and aggregating
data = {
    'Category': ['A', 'A', 'B', 'B', 'A', 'B'],
    'Value': [10, 20, 30, 40, 50, 60]
}

df = pd.DataFrame(data)

group_by_category = df.groupby('Category')

# mean
print(group_by_category.mean())

# standard deviation
print(group_by_category.std())

# describe
print(group_by_category.describe())

# Group by 'Category' and calculate the sum of 'Value'
grouped_df = group_by_category.agg({'Value': 'sum'})
print(grouped_df)

              Value
Category           
A         26.666667
B         43.333333
              Value
Category           
A         20.816660
B         15.275252
         Value                                                    
         count       mean        std   min   25%   50%   75%   max
Category                                                          
A          3.0  26.666667  20.816660  10.0  15.0  20.0  35.0  50.0
B          3.0  43.333333  15.275252  30.0  35.0  40.0  50.0  60.0
          Value
Category       
A            80
B           130


In [38]:
# multiindex
import pandas as pd

# Create a list of tuples for the MultiIndex
index_tuples = [
    ('A', 'one'),
    ('A', 'two'),
    ('B', 'one'),
    ('B', 'two'),
    ('C', 'one'),
    ('C', 'two')
]

# Create a MultiIndex using the list of tuples
multi_index = pd.MultiIndex.from_tuples(index_tuples, names=['Letter', 'Number'])

# Create a pandas DataFrame with the MultiIndex
data = {'Value': [1, 2, 3, 4, 5, 6]}
df = pd.DataFrame(data, index=multi_index)

print(df)


               Value
Letter Number       
A      one         1
       two         2
B      one         3
       two         4
C      one         5
       two         6


In [39]:
df.index.names

FrozenList(['Letter', 'Number'])

In [40]:
df.loc['A'].loc['one']

Value    1
Name: one, dtype: int64

In [41]:
df['Value2'] = [9,8,7,6,5,4]

In [42]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Value,Value2
Letter,Number,Unnamed: 2_level_1,Unnamed: 3_level_1
A,one,1,9
A,two,2,8
B,one,3,7
B,two,4,6
C,one,5,5
C,two,6,4


In [44]:
df.loc['A'].loc['one']['Value2']

9

In [22]:
df.xs('one', level=1) # Return cross-section from the Series/DataFrame.

Unnamed: 0_level_0,Value,Value2
Letter,Unnamed: 1_level_1,Unnamed: 2_level_1
A,1,9
B,3,7
C,5,5
