In [None]:
#Most used functions in Pandas

In [4]:
#Importing the library and creating the DataFrame
import pandas as pd

data = {
    'Product': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'A'],
    'Sales': [100, 150, 200, 120, 250, 180, 300, 350],
    'Region': ['North', 'South', 'North', 'South', 'North', 'South', 'North', 'North']
}

df = pd.DataFrame(data)
print(df)

  Product  Sales Region
0       A    100  North
1       B    150  South
2       A    200  North
3       B    120  South
4       A    250  North
5       B    180  South
6       A    300  North
7       A    350  North


In [None]:
#Reads a CSV file into a DataFrame
df = pd.read_csv('file.csv')

In [6]:
#Displays the first N rows of a DataFrame
df.head()

Unnamed: 0,Product,Sales,Region
0,A,100,North
1,B,150,South
2,A,200,North
3,B,120,South
4,A,250,North


In [7]:
#Displays the last N rows of a DataFrame
df.tail()

Unnamed: 0,Product,Sales,Region
3,B,120,South
4,A,250,North
5,B,180,South
6,A,300,North
7,A,350,North


In [8]:
#Provides a concise summary of a DataFrame, including the data types and missing values
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Product  8 non-null      object
 1   Sales    8 non-null      int64 
 2   Region   8 non-null      object
dtypes: int64(1), object(2)
memory usage: 324.0+ bytes


In [9]:
#Generates descriptive statistics of a DataFrame
df.describe()

Unnamed: 0,Sales
count,8.0
mean,206.25
std,87.82084
min,100.0
25%,142.5
50%,190.0
75%,262.5
max,350.0


In [11]:
#Returns the dimension (rows, columns) of the data frame
df.shape

(8, 3)

In [12]:
#Returns the column labels of the DataFrame
df.columns

Index(['Product', 'Sales', 'Region'], dtype='object')

In [13]:
#Selects the first row
df.iloc[0]

Product        A
Sales        100
Region     North
Name: 0, dtype: object

In [16]:
#Selects all rows for a specified column
df.loc[:, 'Sales']

0    100
1    150
2    200
3    120
4    250
5    180
6    300
7    350
Name: Sales, dtype: int64

In [17]:
#Counts unique values in a column
df['Region'].value_counts()

Region
North    5
South    3
Name: count, dtype: int64

In [22]:
#Groups the DataFrame by one or more columns
titanic_data = pd.read_csv('https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv')
average_age_by_class = titanic_data.groupby('Pclass')['Age'].mean()
print(average_age_by_class)

Pclass
1    38.788981
2    29.868641
3    25.188747
Name: Age, dtype: float64


In [23]:
#Combines DataFrames by merging on a key

# Create the first DataFrame
df1 = pd.DataFrame({
    'ID': [1, 2, 3, 4],
    'Name': ['Alice', 'Bob', 'Charlie', 'David']
})

# Create the second DataFrame
df2 = pd.DataFrame({
    'ID': [2, 3, 4, 5],
    'Age': [25, 30, 35, 40]
})

# Merge the DataFrames on the 'ID' column
merged_df = pd.merge(df1, df2, on='ID')

# Display the result
print(merged_df)

#You can also perform different types of merges, such as left, right, and outer merges
# Left merge
merged_df_left = pd.merge(df1, df2, on='ID', how='left')

# Right merge
merged_df_right = pd.merge(df1, df2, on='ID', how='right')

# Outer merge
merged_df_outer = pd.merge(df1, df2, on='ID', how='outer')

   ID     Name  Age
0   2      Bob   25
1   3  Charlie   30
2   4    David   35


In [25]:
# Concat function in Pandas is used to concatenate two or more DataFrames alon a particular axis
# Create the first DataFrame
df1 = pd.DataFrame({
    'ID': [1, 2, 3],
    'Name': ['Alice', 'Bob', 'Charlie']
})

# Create the second DataFrame
df2 = pd.DataFrame({
    'ID': [4, 5, 6],
    'Name': ['David', 'Eva', 'Frank']
})

# Concatenate the DataFrames along the rows (axis=0)
concatenated_df = pd.concat([df1, df2], ignore_index=True)

# Display the result
print(concatenated_df)

   ID     Name
0   1    Alice
1   2      Bob
2   3  Charlie
3   4    David
4   5      Eva
5   6    Frank


In [26]:
#Drops the specified labels from rows or columns
df.drop(['Region'], axis=1, inplace=True)

In [None]:
#Fills missing values in a DataFrame
df.fillna(value)

In [None]:
#Writes the DataFrame to a CSV file or Excel spreadsheet
df.to_csv('output.csv', index=False)
df.to_excel('output.xlsx', index=False)