In [106]:
import pandas as pd
import numpy as np

## Data Frames

In [107]:
forum_users = {
    'User ID': np.array([1, 2, 3, 4, 5]),
    'Username': ['nicky_a', 'mike_sky', 'anymike', 'mcast', 'charlie'],
    'Age': [18, 35, 25, 38, None],
    'Joined Date': pd.to_datetime(['2032-01-01', '2032-02-15', '2032-04-25', '2032-06-21', '2032-09-15']),
    'Total Posts': [150, 230, 80, 420, 310],
    'Reputation': [500, 720, 200, 940, 500]
}

df = pd.DataFrame(forum_users)
df

Unnamed: 0,User ID,Username,Age,Joined Date,Total Posts,Reputation
0,1,nicky_a,18.0,2032-01-01,150,500
1,2,mike_sky,35.0,2032-02-15,230,720
2,3,anymike,25.0,2032-04-25,80,200
3,4,mcast,38.0,2032-06-21,420,940
4,5,charlie,,2032-09-15,310,500


In [108]:
df.shape

(5, 6)

In [109]:
type(df.columns)

pandas.core.indexes.base.Index

In [110]:
df.columns

Index(['User ID', 'Username', 'Age', 'Joined Date', 'Total Posts',
       'Reputation'],
      dtype='object')

In [111]:
df.columns.tolist()

['User ID', 'Username', 'Age', 'Joined Date', 'Total Posts', 'Reputation']

In [112]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [113]:
df.index.tolist()

[0, 1, 2, 3, 4]

In [114]:
df.dtypes

User ID                 int64
Username               object
Age                   float64
Joined Date    datetime64[ns]
Total Posts             int64
Reputation              int64
dtype: object

In [115]:
df.values

array([[1, 'nicky_a', 18.0, Timestamp('2032-01-01 00:00:00'), 150, 500],
       [2, 'mike_sky', 35.0, Timestamp('2032-02-15 00:00:00'), 230, 720],
       [3, 'anymike', 25.0, Timestamp('2032-04-25 00:00:00'), 80, 200],
       [4, 'mcast', 38.0, Timestamp('2032-06-21 00:00:00'), 420, 940],
       [5, 'charlie', nan, Timestamp('2032-09-15 00:00:00'), 310, 500]],
      dtype=object)

In [116]:
type(df.values)

numpy.ndarray

In [117]:
df.values[1, 1]

'mike_sky'

In [118]:
df.head()

Unnamed: 0,User ID,Username,Age,Joined Date,Total Posts,Reputation
0,1,nicky_a,18.0,2032-01-01,150,500
1,2,mike_sky,35.0,2032-02-15,230,720
2,3,anymike,25.0,2032-04-25,80,200
3,4,mcast,38.0,2032-06-21,420,940
4,5,charlie,,2032-09-15,310,500


In [119]:
df.head(3)

Unnamed: 0,User ID,Username,Age,Joined Date,Total Posts,Reputation
0,1,nicky_a,18.0,2032-01-01,150,500
1,2,mike_sky,35.0,2032-02-15,230,720
2,3,anymike,25.0,2032-04-25,80,200


In [120]:
type(df.head(3))

pandas.core.frame.DataFrame

In [121]:
df.tail(2)

Unnamed: 0,User ID,Username,Age,Joined Date,Total Posts,Reputation
3,4,mcast,38.0,2032-06-21,420,940
4,5,charlie,,2032-09-15,310,500


In [122]:
df.describe().round(2)

Unnamed: 0,User ID,Age,Joined Date,Total Posts,Reputation
count,5.0,4.0,5,5.0,5.0
mean,3.0,29.0,2032-04-28 00:00:00,238.0,572.0
min,1.0,18.0,2032-01-01 00:00:00,80.0,200.0
25%,2.0,23.25,2032-02-15 00:00:00,150.0,500.0
50%,3.0,30.0,2032-04-25 00:00:00,230.0,500.0
75%,4.0,35.75,2032-06-21 00:00:00,310.0,720.0
max,5.0,38.0,2032-09-15 00:00:00,420.0,940.0
std,1.58,9.2,,133.3,276.62


In [130]:
df.select_dtypes(include='object').columns

Index(['Username'], dtype='object')

In [124]:
df.isna()

Unnamed: 0,User ID,Username,Age,Joined Date,Total Posts,Reputation
0,False,False,False,False,False,False
1,False,False,False,False,False,False
2,False,False,False,False,False,False
3,False,False,False,False,False,False
4,False,False,True,False,False,False


In [131]:
df.isna().sum()

pandas.core.series.Series

## Series

In [137]:
username_series = df['Username']
username_series

0     nicky_a
1    mike_sky
2     anymike
3       mcast
4     charlie
Name: Username, dtype: object

In [138]:
type(username_series)

pandas.core.series.Series

In [139]:
username_series.values

array(['nicky_a', 'mike_sky', 'anymike', 'mcast', 'charlie'], dtype=object)

In [140]:
type(username_series.values)

numpy.ndarray

In [141]:
username_series.index

RangeIndex(start=0, stop=5, step=1)

In [151]:
df['Reputation'].value_counts()

Reputation
500    2
720    1
200    1
940    1
Name: count, dtype: int64

In [152]:
df['Reputation'].value_counts(ascending=True)

Reputation
720    1
200    1
940    1
500    2
Name: count, dtype: int64

In [153]:
df['Reputation'].unique()

array([500, 720, 200, 940])

In [154]:
df['Username'].sort_values()

2     anymike
4     charlie
3       mcast
1    mike_sky
0     nicky_a
Name: Username, dtype: object