In [1]:
import pandas as pd

### melt()

In [2]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Math': [85, 72, 90],
    'Physics': [78, 90, 82],
    'Chemistry': [92, 88, 78]
}

df = pd.DataFrame(data)
print(df)

      Name  Math  Physics  Chemistry
0    Alice    85       78         92
1      Bob    72       90         88
2  Charlie    90       82         78


In [3]:
melted_df = pd.melt(df, id_vars=['Name'], var_name='Subject', value_name='Score')
print(melted_df)

      Name    Subject  Score
0    Alice       Math     85
1      Bob       Math     72
2  Charlie       Math     90
3    Alice    Physics     78
4      Bob    Physics     90
5  Charlie    Physics     82
6    Alice  Chemistry     92
7      Bob  Chemistry     88
8  Charlie  Chemistry     78


### concat()

In [4]:
# Create two sample DataFrames
data1 = {
    'A': [1, 2, 3],
    'B': [4, 5, 6]
}
df1 = pd.DataFrame(data1)

data2 = {
    'A': [7, 8, 9],
    'B': [10, 11, 12]
}
df2 = pd.DataFrame(data2)

# Concatenate along rows (axis=0)
concatenated_df = pd.concat([df1, df2], axis=0)

print("DataFrame 1:")
print(df1)
print("\nDataFrame 2:")
print(df2)
print("\nConcatenated DataFrame:")
print(concatenated_df)

DataFrame 1:
   A  B
0  1  4
1  2  5
2  3  6

DataFrame 2:
   A   B
0  7  10
1  8  11
2  9  12

Concatenated DataFrame:
   A   B
0  1   4
1  2   5
2  3   6
0  7  10
1  8  11
2  9  12


### pivot()

In [5]:
df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two','two'],
                    'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
                    'baz': [1, 2, 3, 4, 5, 6],
                    'zoo': ['x', 'y', 'z', 'q', 'w', 't']})

In [6]:
df.pivot(index='foo', columns='bar', values='baz')

bar,A,B,C
foo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,1,2,3
two,4,5,6


### sort_values()

In [7]:
# Create a sample DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [25, 32, 28, 22, 30],
    'Salary': [60000, 75000, 65000, 50000, 80000]
}

df = pd.DataFrame(data)

# Sort DataFrame by 'Age' column in ascending order
sorted_df_age_asc = df.sort_values(by='Age')

# Sort DataFrame by 'Salary' column in descending order
sorted_df_salary_desc = df.sort_values(by='Salary', ascending=False)

print("Original DataFrame:")
print(df)
print("\nSorted by Age (ascending):")
print(sorted_df_age_asc)
print("\nSorted by Salary (descending):")
print(sorted_df_salary_desc)

# Create a sample Series
ages = pd.Series([25, 32, 28, 22, 30], index=['Alice', 'Bob', 'Charlie', 'David', 'Eve'])

# Sort Series in descending order
sorted_ages_desc = ages.sort_values(ascending=False)

print("\nOriginal Series:")
print(ages)
print("\nSorted Series (descending):")
print(sorted_ages_desc)

Original DataFrame:
      Name  Age  Salary
0    Alice   25   60000
1      Bob   32   75000
2  Charlie   28   65000
3    David   22   50000
4      Eve   30   80000

Sorted by Age (ascending):
      Name  Age  Salary
3    David   22   50000
0    Alice   25   60000
2  Charlie   28   65000
4      Eve   30   80000
1      Bob   32   75000

Sorted by Salary (descending):
      Name  Age  Salary
4      Eve   30   80000
1      Bob   32   75000
2  Charlie   28   65000
0    Alice   25   60000
3    David   22   50000

Original Series:
Alice      25
Bob        32
Charlie    28
David      22
Eve        30
dtype: int64

Sorted Series (descending):
Bob        32
Eve        30
Charlie    28
Alice      25
David      22
dtype: int64


### rename()

In [8]:
# Create a sample DataFrame
data = {'A': [1, 2, 3],
        'B': [4, 5, 6],
        'C': [7, 8, 9]}
df = pd.DataFrame(data)

print("Original DataFrame:")
print(df)

# Renaming columns using a dictionary
new_column_names = {'A': 'Column_1', 'B': 'Column_2', 'C': 'Column_3'}
df_renamed_columns = df.rename(columns=new_column_names)

print("\nDataFrame after renaming columns:")
print(df_renamed_columns)

# Renaming index using a dictionary
new_index_names = {0: 'Row_1', 1: 'Row_2', 2: 'Row_3'}
df_renamed_index = df.rename(index=new_index_names)

print("\nDataFrame after renaming index:")
print(df_renamed_index)

Original DataFrame:
   A  B  C
0  1  4  7
1  2  5  8
2  3  6  9

DataFrame after renaming columns:
   Column_1  Column_2  Column_3
0         1         4         7
1         2         5         8
2         3         6         9

DataFrame after renaming index:
       A  B  C
Row_1  1  4  7
Row_2  2  5  8
Row_3  3  6  9


### Other notebook below

### drop()

In [9]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 22, 28],
    'Gender': ['Female', 'Male', 'Male', 'Male']
}

df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

# Drop row with index 2
df_dropped_row = df.drop(2)
print("\nDataFrame after dropping row with index 2:")
print(df_dropped_row)

# Drop the "Age" column
df_dropped_column = df.drop('Age', axis=1)
print("\nDataFrame after dropping 'Age' column:")
print(df_dropped_column)

Original DataFrame:
      Name  Age  Gender
0    Alice   25  Female
1      Bob   30    Male
2  Charlie   22    Male
3    David   28    Male

DataFrame after dropping row with index 2:
    Name  Age  Gender
0  Alice   25  Female
1    Bob   30    Male
3  David   28    Male

DataFrame after dropping 'Age' column:
      Name  Gender
0    Alice  Female
1      Bob    Male
2  Charlie    Male
3    David    Male


### Extract rows that meet logical criteria

In [10]:
# Create a dictionary with integer data
data = {
    'Column1': [12, 45, 67, 23, 89, 34, 56, 78, 90, 31],
    'Column2': [56, 78, 32, 90, 12, 54, 76, 98, 21, 43]
}

# Create the DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print(df)

# Filter by criteria
df[df.Column1 > 60]

   Column1  Column2
0       12       56
1       45       78
2       67       32
3       23       90
4       89       12
5       34       54
6       56       76
7       78       98
8       90       21
9       31       43


Unnamed: 0,Column1,Column2
2,67,32
4,89,12
7,78,98
8,90,21


### drop_duplicates()

In [11]:
data = {
    'Column1': [1, 2, 3, 4, 5, 1, 2, 3, 4, 5],
    'Column2': [6, 7, 8, 9, 10, 6, 7, 8, 9, 10]
}

print("Original DataFrame")
df = pd.DataFrame(data)
print(df)

df = df.drop_duplicates()
print("\nDataFrame without duplicates")
print(df)

Original DataFrame
   Column1  Column2
0        1        6
1        2        7
2        3        8
3        4        9
4        5       10
5        1        6
6        2        7
7        3        8
8        4        9
9        5       10

DataFrame without duplicates
   Column1  Column2
0        1        6
1        2        7
2        3        8
3        4        9
4        5       10


### sample()

In [12]:
data = {
    'Column1': [1, 2, 3, 4, 5, 1, 2, 3, 4, 5],
    'Column2': [6, 7, 8, 9, 10, 6, 7, 8, 9, 10]
}

print("Original DataFrame")
df = pd.DataFrame(data)
print(df)

df = df.sample(frac=0.2)
print("\nGet a fraction from the original DataFrame")
print(df)

Original DataFrame
   Column1  Column2
0        1        6
1        2        7
2        3        8
3        4        9
4        5       10
5        1        6
6        2        7
7        3        8
8        4        9
9        5       10

Get a fraction from the original DataFrame
   Column1  Column2
6        2        7
8        4        9
