# Examples in pandas for converting data types

In [10]:
# Setup
import pandas as pd
import numpy as np
import time
from datetime import datetime

df = pd.DataFrame([["3.5", "2", "s1", "2017-08-01 00:00:00"], 
                   ["-6.7", "-3", "s2", "2017-08-01 00:00:00"]], 
                  columns=['type_float', 'type_int', 'type_str', 'type_datetime'])
print(df)


  type_float type_int type_str        type_datetime
0        3.5        2       s1  2017-08-01 00:00:00
1       -6.7       -3       s2  2017-08-01 00:00:00


## Helper functions

In [0]:
STR_DATE_FORMAT = '%Y-%m-%d %H:%M:%S'

def is_datetime(s):
    try:
        v = datetime.strptime(s, STR_DATE_FORMAT)  # e.g. 2017-08-01 00:00:00
        return True
    except ValueError:
        return False
    
def is_int(s):
    try:
        v = int(s)
        return True
    except ValueError:
        return False
    
def is_float(s):
    try:
        v = float(s)
        return True
    except ValueError:
        return False
      
def start_timing():
    start_time = time.time()
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(start_time)))
    return start_time

def end_timing(start_time):
    end_time = time.time()
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(end_time)))
    print("Total elapsed time: %g seconds" % (end_time - start_time))
    return end_time

## Convert dataframe column types, one column at a time

In [12]:
df2 = df.copy()
t0 = start_timing()
for i, c in enumerate(df2.columns):    
    # Verify that last row is well formatted since it's value is used to determine the series type
    v = df[c].iloc[-1] 
    print('Before:', i, c, type(v))
    
    if is_datetime(v):
        df2[c] = df[c].apply(pd.to_datetime, format=STR_DATE_FORMAT, errors='coerce')
    
    if is_int(v): 
        df2[c] = df[c].apply(pd.to_numeric, errors='coerce')
        
    if is_float(v): 
        df2[c] = df[c].apply(pd.to_numeric, errors='coerce')
        
    v2 = df2[c].iloc[0] 
    print('After:', i, c, type(v2))
    
    end_timing(t0)
    print('---')
    

2018-07-29 19:20:06
Before: 0 type_float <class 'str'>
After: 0 type_float <class 'numpy.float64'>
2018-07-29 19:20:06
Total elapsed time: 0.0028441 seconds
---
Before: 1 type_int <class 'str'>
After: 1 type_int <class 'numpy.int64'>
2018-07-29 19:20:07
Total elapsed time: 0.00547266 seconds
---
Before: 2 type_str <class 'str'>
After: 2 type_str <class 'str'>
2018-07-29 19:20:07
Total elapsed time: 0.00799227 seconds
---
Before: 3 type_datetime <class 'str'>
After: 3 type_datetime <class 'pandas._libs.tslib.Timestamp'>
2018-07-29 19:20:07
Total elapsed time: 0.0100474 seconds
---


## Convert a pandas series of floats to a series of ints

In [18]:
s = pd.Series([1.1,2.2,3.3,np.nan])
print("Before:")
print(s)
print("---")

s = s.fillna(-1).astype(int)
print("After:")
print(s)

Before:
0    1.1
1    2.2
2    3.3
3    NaN
dtype: float64
---
After:
0    1
1    2
2    3
3   -1
dtype: int64
