In [1]:
import pandas as pd
import numpy as np

In [2]:
forum_users = {
    'User ID': np.array([1, 2, 3, 4, 5]),
    'Username': ['nicky_a', 'mike_sky', 'anymike', 'mcast', 'charlie'],
    'Age': [18, 35, 25, 38, None],
    'Joined Date': pd.to_datetime(['2032-01-01', '2032-02-15', '2032-04-25', '2032-06-21', '2032-09-15']),
    'Total Posts': [150, 230, 80, 420, 310],
    'Reputation': [500, 720, 200, 940, 500]
}

df = pd.DataFrame(forum_users)
df

Unnamed: 0,User ID,Username,Age,Joined Date,Total Posts,Reputation
0,1,nicky_a,18.0,2032-01-01,150,500
1,2,mike_sky,35.0,2032-02-15,230,720
2,3,anymike,25.0,2032-04-25,80,200
3,4,mcast,38.0,2032-06-21,420,940
4,5,charlie,,2032-09-15,310,500


## Filtering

In [3]:
df[['Username', 'Age']]

Unnamed: 0,Username,Age
0,nicky_a,18.0
1,mike_sky,35.0
2,anymike,25.0
3,mcast,38.0
4,charlie,


In [4]:
df.loc[3]

User ID                          4
Username                     mcast
Age                           38.0
Joined Date    2032-06-21 00:00:00
Total Posts                    420
Reputation                     940
Name: 3, dtype: object

In [5]:
df.loc[2:3]

Unnamed: 0,User ID,Username,Age,Joined Date,Total Posts,Reputation
2,3,anymike,25.0,2032-04-25,80,200
3,4,mcast,38.0,2032-06-21,420,940


In [6]:
df.loc[2:3, ['Username', 'Joined Date', 'Reputation']]

Unnamed: 0,Username,Joined Date,Reputation
2,anymike,2032-04-25,200
3,mcast,2032-06-21,940


In [12]:
df.iloc[:, 1:4]

Unnamed: 0,Username,Age,Joined Date
0,nicky_a,18.0,2032-01-01
1,mike_sky,35.0,2032-02-15
2,anymike,25.0,2032-04-25
3,mcast,38.0,2032-06-21
4,charlie,,2032-09-15


In [15]:
df[df['Total Posts'] >= 300]

Unnamed: 0,User ID,Username,Age,Joined Date,Total Posts,Reputation
3,4,mcast,38.0,2032-06-21,420,940
4,5,charlie,,2032-09-15,310,500


In [18]:
df[df['Reputation'] >= 500]

Unnamed: 0,User ID,Username,Age,Joined Date,Total Posts,Reputation
0,1,nicky_a,18.0,2032-01-01,150,500
1,2,mike_sky,35.0,2032-02-15,230,720
3,4,mcast,38.0,2032-06-21,420,940
4,5,charlie,,2032-09-15,310,500


In [24]:
df[(df['Total Posts'] >= 300) & (df['Age'] >= 25)]

Unnamed: 0,User ID,Username,Age,Joined Date,Total Posts,Reputation
3,4,mcast,38.0,2032-06-21,420,940


In [29]:
df[(df['Total Posts'] >= 400) | (df['Age'] <= 20)]

Unnamed: 0,User ID,Username,Age,Joined Date,Total Posts,Reputation
0,1,nicky_a,18.0,2032-01-01,150,500
3,4,mcast,38.0,2032-06-21,420,940


In [33]:
df['Reputation'].isin([300, 500])

0     True
1    False
2    False
3    False
4     True
Name: Reputation, dtype: bool

In [38]:
df[df['Total Posts'].isin(range(200, 350))]

Unnamed: 0,User ID,Username,Age,Joined Date,Total Posts,Reputation
1,2,mike_sky,35.0,2032-02-15,230,720
4,5,charlie,,2032-09-15,310,500


In [39]:
date_range = pd.date_range(start='2032-03-01', end='2032-08-01')
date_range

DatetimeIndex(['2032-03-01', '2032-03-02', '2032-03-03', '2032-03-04',
               '2032-03-05', '2032-03-06', '2032-03-07', '2032-03-08',
               '2032-03-09', '2032-03-10',
               ...
               '2032-07-23', '2032-07-24', '2032-07-25', '2032-07-26',
               '2032-07-27', '2032-07-28', '2032-07-29', '2032-07-30',
               '2032-07-31', '2032-08-01'],
              dtype='datetime64[ns]', length=154, freq='D')

In [40]:
df[df['Joined Date'].isin(date_range)]

Unnamed: 0,User ID,Username,Age,Joined Date,Total Posts,Reputation
2,3,anymike,25.0,2032-04-25,80,200
3,4,mcast,38.0,2032-06-21,420,940


## Sorting

In [44]:
df.sort_values(by='Age')

Unnamed: 0,User ID,Username,Age,Joined Date,Total Posts,Reputation
0,1,nicky_a,18.0,2032-01-01,150,500
2,3,anymike,25.0,2032-04-25,80,200
1,2,mike_sky,35.0,2032-02-15,230,720
3,4,mcast,38.0,2032-06-21,420,940
4,5,charlie,,2032-09-15,310,500


In [46]:
df.sort_values(by='Joined Date', ascending=False)

Unnamed: 0,User ID,Username,Age,Joined Date,Total Posts,Reputation
4,5,charlie,,2032-09-15,310,500
3,4,mcast,38.0,2032-06-21,420,940
2,3,anymike,25.0,2032-04-25,80,200
1,2,mike_sky,35.0,2032-02-15,230,720
0,1,nicky_a,18.0,2032-01-01,150,500


In [49]:
df[['User ID', 'Total Posts', 'Reputation']]

Unnamed: 0,User ID,Total Posts,Reputation
0,1,150,500
1,2,230,720
2,3,80,200
3,4,420,940
4,5,310,500


In [51]:
df[['User ID', 'Total Posts', 'Reputation']].sort_values(by=2, axis=1, ascending=False)

Unnamed: 0,Reputation,Total Posts,User ID
0,500,150,1
1,720,230,2
2,200,80,3
3,940,420,4
4,500,310,5


In [52]:
df.sort_values(by='Total Posts')

Unnamed: 0,User ID,Username,Age,Joined Date,Total Posts,Reputation
2,3,anymike,25.0,2032-04-25,80,200
0,1,nicky_a,18.0,2032-01-01,150,500
1,2,mike_sky,35.0,2032-02-15,230,720
4,5,charlie,,2032-09-15,310,500
3,4,mcast,38.0,2032-06-21,420,940
