## Pandas Cheat Sheet for Beginners

### Importing pandas

In [2]:
import pandas as pd

### Creating DataFrames

In [3]:
# From a dictionary
data = {'Column1': [1, 2, 3], 'Column2': [4, 5, 6]}
dfd = pd.DataFrame(data)

# From a list of lists
data = [[1, 2, 3], [4, 5, 6]]
dfl = pd.DataFrame(data, columns=['Column1', 'Column2', 'Column3'])

In [4]:
dfd # DataFrame from a dictionary

Unnamed: 0,Column1,Column2
0,1,4
1,2,5
2,3,6


In [5]:
dfl # DataFrame from a list of lists

Unnamed: 0,Column1,Column2,Column3
0,1,2,3
1,4,5,6


### Reading and Writing Data

In [None]:
# Reading data from a CSV file
df = pd.read_csv('file.csv')

# Writing data to a CSV file
df.to_csv('file.csv', index=False)

# Reading data from an Excel file
df = pd.read_excel('file.xlsx')

# Writing data to an Excel file
df.to_excel('file.xlsx', index=False)

### Viewing Data

In [34]:
# Creating a simple DataFrame
data = {
    'Player': ['LeBron James', 'Stephen Curry', 'Kevin Durant', 'Giannis Antetokounmpo'],
    'Points': [27, 24, 29, 28],
    'Rebounds': [7, 5, 8, 11],
    'Assists': [8, 6, 5, 6]
}

df = pd.DataFrame(data)

In [35]:
# Display the first few rows
df.head(1)

Unnamed: 0,Player,Points,Rebounds,Assists
0,LeBron James,27,7,8


In [36]:
# Display the last few rows
df.tail(1)

Unnamed: 0,Player,Points,Rebounds,Assists
3,Giannis Antetokounmpo,28,11,6


In [37]:
# Display basic information about the DataFrame
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Player    4 non-null      object
 1   Points    4 non-null      int64 
 2   Rebounds  4 non-null      int64 
 3   Assists   4 non-null      int64 
dtypes: int64(3), object(1)
memory usage: 260.0+ bytes


In [38]:
# Display summary statistics
df.describe()

Unnamed: 0,Points,Rebounds,Assists
count,4.0,4.0,4.0
mean,27.0,7.75,6.25
std,2.160247,2.5,1.258306
min,24.0,5.0,5.0
25%,26.25,6.5,5.75
50%,27.5,7.5,6.0
75%,28.25,8.75,6.5
max,29.0,11.0,8.0


### Selecting Data

In [39]:
# Selecting a single column
df['Points']

0    27
1    24
2    29
3    28
Name: Points, dtype: int64

In [40]:
# Selecting multiple columns
df[['Rebounds', 'Assists']]


Unnamed: 0,Rebounds,Assists
0,7,8
1,5,6
2,8,5
3,11,6


In [41]:
# Selecting rows by index
df.iloc[0]  # First row

Player      LeBron James
Points                27
Rebounds               7
Assists                8
Name: 0, dtype: object

In [42]:
# Selecting rows by index
df.iloc[0:2]  # First two rows

Unnamed: 0,Player,Points,Rebounds,Assists
0,LeBron James,27,7,8
1,Stephen Curry,24,5,6


In [43]:
# Selecting rows by condition
df[df['Rebounds'] > 7]

Unnamed: 0,Player,Points,Rebounds,Assists
2,Kevin Durant,29,8,5
3,Giannis Antetokounmpo,28,11,6


### Modifying Data

In [44]:
# Adding a new column
df['Total'] = df['Points'] + df['Rebounds'] + df['Assists']
df[df['Player']=='Stephen Curry']


Unnamed: 0,Player,Points,Rebounds,Assists,Total
1,Stephen Curry,24,5,6,35


In [45]:
# Renaming columns
df.rename(columns={'Total': 'Pts+Rbs+Ats'}, inplace=True)
df.iloc[:1]

Unnamed: 0,Player,Points,Rebounds,Assists,Pts+Rbs+Ats
0,LeBron James,27,7,8,42


In [46]:
# Dropping a column
df.drop('Pts+Rbs+Ats', axis=1, inplace=True)
df.iloc[2:]

Unnamed: 0,Player,Points,Rebounds,Assists
2,Kevin Durant,29,8,5
3,Giannis Antetokounmpo,28,11,6


### Handling Missing Data

In [47]:
# Adding a new player row
new_player_df = pd.DataFrame([{'Player': 'Bronny James', 'Points': 0, 'Rebounds': 0, 'Assists': None}])
df = pd.concat([df, new_player_df], ignore_index=True)
df

Unnamed: 0,Player,Points,Rebounds,Assists
0,LeBron James,27,7,8.0
1,Stephen Curry,24,5,6.0
2,Kevin Durant,29,8,5.0
3,Giannis Antetokounmpo,28,11,6.0
4,Bronny James,0,0,


In [48]:
# Checking for missing values
df.isnull().sum()

Player      0
Points      0
Rebounds    0
Assists     1
dtype: int64

In [49]:
# Dropping rows with missing values
df.dropna(inplace=True)

# Filling missing values
# df.fillna(0, inplace=True)
df

Unnamed: 0,Player,Points,Rebounds,Assists
0,LeBron James,27,7,8
1,Stephen Curry,24,5,6
2,Kevin Durant,29,8,5
3,Giannis Antetokounmpo,28,11,6


### Grouping and Aggregating Data

In [50]:
# Grouping data by a column and calculating the mean
# df.groupby('Column1').mean()

# Aggregating data with multiple functions
# df.groupby('Column1').agg({'Column2': ['mean', 'sum']})

### Misc

In [51]:
round(df['Points'].mean(), 2)

np.float64(27.0)

In [52]:
df['Points'].sum()

np.int64(108)

In [53]:
df.columns

Index(['Player', 'Points', 'Rebounds', 'Assists'], dtype='object')

In [54]:
df.shape

(4, 4)

In [55]:
df['Player'] = df['Player'].replace('Giannis Antetokounmpo', 'Greek Freak')
df[df['Player'] == 'Greek Freak']

Unnamed: 0,Player,Points,Rebounds,Assists
3,Greek Freak,28,11,6


---