In [1]:
import pandas as pd
import numpy as np

In [2]:
# create a small dictionary with different data types

dft = pd.DataFrame(dict(A = np.random.rand(3),
                        B = 1,
                        C = 'foo',
                        D = pd.Timestamp('20010102'),
                        E = pd.Series([1.0]*3).astype('float32'),
                                F = False,
                                G = pd.Series([1]*3,dtype='int8')))

dft

Unnamed: 0,A,B,C,D,E,F,G
0,0.881113,1,foo,2001-01-02,1.0,False,1
1,0.658997,1,foo,2001-01-02,1.0,False,1
2,0.276585,1,foo,2001-01-02,1.0,False,1


In [8]:
# There is a really easy way to see what kind of dtypes 
# are in each column. 

dft.dtypes

A           float64
B             int64
C            object
D    datetime64[ns]
E           float32
F              bool
G              int8
dtype: object

In [14]:
# If a pandas object contains data multiple dtypes IN A 
# SINGLE COLUMN, the dtype of the column will be chosen 
# to accommodate all of the data types (object is the 
# most general).
# these ints are coerced to floats

test = pd.Series([1, 2, 3, 4, 5, 6.])

In [26]:
# string data forces an ``object`` dtype

test = pd.Series([1, 2, 3, 6., 'foo'])

In [27]:
# The method get_dtype_counts() will return the number 
# of columns of each type in a DataFrame:

dft.get_dtype_counts()

bool              1
datetime64[ns]    1
float32           1
float64           1
int64             1
int8              1
object            1
dtype: int64

In [28]:
# create a small data frame. 

df = pd.DataFrame(np.random.randn(5, 4), columns=['a', 'b', 'c', 'd'])
df

Unnamed: 0,a,b,c,d
0,-1.068047,-0.515719,2.455947,0.217893
1,-1.504756,0.906068,-0.375539,-1.729486
2,1.664417,-0.877663,-1.187976,0.259539
3,-0.175308,1.988579,-0.30083,0.971011
4,-0.01603,-0.072009,0.907375,-1.093754


In [39]:
# Use df.apply to find the square root of all the values. 
# NaN means not a number

sq_df = df.apply(np.sqrt)
sq_df

Unnamed: 0,a,b,c,d
0,,,1.567146,0.46679
1,,0.951876,,
2,1.290123,,,0.50945
3,,1.41017,,0.985399
4,,,0.952563,


In [41]:
df2 = sq_df.fillna(df.apply(np.mean, axis=0))
df2

Unnamed: 0,a,b,c,d
0,-0.219945,0.285851,1.567146,0.46679
1,-0.219945,0.951876,0.299796,-0.274959
2,1.290123,0.285851,0.299796,0.50945
3,-0.219945,1.41017,0.299796,0.985399
4,-0.219945,0.285851,0.952563,-0.274959


In [30]:
# find the mean of all of the columns

df.apply(np.mean, axis=0)

a   -0.219945
b    0.285851
c    0.299796
d   -0.274959
dtype: float64

In [31]:
# find the mean of all of the rows

df.apply(np.mean, axis=1)

0    0.272519
1   -0.675928
2   -0.035421
3    0.620863
4   -0.068605
dtype: float64

In [55]:
# Let's create a random array with 50 numbers, ranging 
# from 0 to 7.

data = np.random.randint(0, 7, size = 50)
data

array([6, 1, 0, 6, 2, 6, 0, 4, 1, 4, 0, 2, 0, 5, 6, 6, 4, 2, 6, 0, 5, 1, 5,
       5, 1, 1, 6, 1, 5, 1, 5, 3, 1, 5, 5, 2, 3, 3, 5, 6, 4, 4, 4, 4, 2, 4,
       2, 1, 6, 5])

In [56]:
# convert the array into a series

s = pd.Series(data)

In [58]:
# How many of each number is there in the series? Enter 
# value_counts()

pd.value_counts(s)

5    10
6     9
1     9
4     8
2     6
0     5
3     3
dtype: int64