In [12]:
import pandas as pd

In [13]:
df = pd.read_csv("AAPL.csv")
df.columns

Index(['Unnamed: 0', 'symbol', 'date', 'close', 'high', 'low', 'open',
       'volume', 'adjClose', 'adjHigh', 'adjLow', 'adjOpen', 'adjVolume',
       'divCash', 'splitFactor'],
      dtype='object')

In [14]:
cols = ['date', 'high', 'low', 'open', 'close']

apple_stocks = pd.read_csv("AAPL.csv", usecols=cols)
apple_stocks

Unnamed: 0,date,close,high,low,open
0,2015-05-27 00:00:00+00:00,132.045,132.260,130.0500,130.34
1,2015-05-28 00:00:00+00:00,131.780,131.950,131.1000,131.86
2,2015-05-29 00:00:00+00:00,130.280,131.450,129.9000,131.23
3,2015-06-01 00:00:00+00:00,130.535,131.390,130.0500,131.20
4,2015-06-02 00:00:00+00:00,129.960,130.655,129.3200,129.86
...,...,...,...,...,...
1253,2020-05-18 00:00:00+00:00,314.960,316.500,310.3241,313.17
1254,2020-05-19 00:00:00+00:00,313.140,318.520,313.0100,315.03
1255,2020-05-20 00:00:00+00:00,319.230,319.520,316.2000,316.68
1256,2020-05-21 00:00:00+00:00,316.850,320.890,315.8700,318.66


Reading datetime objects from files

In [15]:
apple_stocks.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1258 entries, 0 to 1257
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    1258 non-null   object 
 1   close   1258 non-null   float64
 2   high    1258 non-null   float64
 3   low     1258 non-null   float64
 4   open    1258 non-null   float64
dtypes: float64(4), object(1)
memory usage: 49.3+ KB


In [16]:
# setting out index to date
apple_stocks.set_index('date', inplace=True)

In [17]:
apple_stocks.index

Index(['2015-05-27 00:00:00+00:00', '2015-05-28 00:00:00+00:00',
       '2015-05-29 00:00:00+00:00', '2015-06-01 00:00:00+00:00',
       '2015-06-02 00:00:00+00:00', '2015-06-03 00:00:00+00:00',
       '2015-06-04 00:00:00+00:00', '2015-06-05 00:00:00+00:00',
       '2015-06-08 00:00:00+00:00', '2015-06-09 00:00:00+00:00',
       ...
       '2020-05-11 00:00:00+00:00', '2020-05-12 00:00:00+00:00',
       '2020-05-13 00:00:00+00:00', '2020-05-14 00:00:00+00:00',
       '2020-05-15 00:00:00+00:00', '2020-05-18 00:00:00+00:00',
       '2020-05-19 00:00:00+00:00', '2020-05-20 00:00:00+00:00',
       '2020-05-21 00:00:00+00:00', '2020-05-22 00:00:00+00:00'],
      dtype='object', name='date', length=1258)

In [18]:
# converting into Datetime object
pd.to_datetime(apple_stocks.index)

DatetimeIndex(['2015-05-27 00:00:00+00:00', '2015-05-28 00:00:00+00:00',
               '2015-05-29 00:00:00+00:00', '2015-06-01 00:00:00+00:00',
               '2015-06-02 00:00:00+00:00', '2015-06-03 00:00:00+00:00',
               '2015-06-04 00:00:00+00:00', '2015-06-05 00:00:00+00:00',
               '2015-06-08 00:00:00+00:00', '2015-06-09 00:00:00+00:00',
               ...
               '2020-05-11 00:00:00+00:00', '2020-05-12 00:00:00+00:00',
               '2020-05-13 00:00:00+00:00', '2020-05-14 00:00:00+00:00',
               '2020-05-15 00:00:00+00:00', '2020-05-18 00:00:00+00:00',
               '2020-05-19 00:00:00+00:00', '2020-05-20 00:00:00+00:00',
               '2020-05-21 00:00:00+00:00', '2020-05-22 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='date', length=1258, freq=None)

We can also directly do that using `parse_dates` when reading csv

In [19]:
apple_stocks = pd.read_csv(
    "AAPL.csv", 
    usecols=cols,
    parse_dates=['date'],
    index_col='date')

apple_stocks.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1258 entries, 2015-05-27 00:00:00+00:00 to 2020-05-22 00:00:00+00:00
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   close   1258 non-null   float64
 1   high    1258 non-null   float64
 2   low     1258 non-null   float64
 3   open    1258 non-null   float64
dtypes: float64(4)
memory usage: 49.1 KB


Now we can use them as datetime objects

In [20]:
days = apple_stocks.index.day_name()
days

Index(['Wednesday', 'Thursday', 'Friday', 'Monday', 'Tuesday', 'Wednesday',
       'Thursday', 'Friday', 'Monday', 'Tuesday',
       ...
       'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Monday',
       'Tuesday', 'Wednesday', 'Thursday', 'Friday'],
      dtype='object', name='date', length=1258)

In [21]:
# adding date name columns
apple_stocks.insert(0, 'day', days)
apple_stocks

Unnamed: 0_level_0,day,close,high,low,open
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-05-27 00:00:00+00:00,Wednesday,132.045,132.260,130.0500,130.34
2015-05-28 00:00:00+00:00,Thursday,131.780,131.950,131.1000,131.86
2015-05-29 00:00:00+00:00,Friday,130.280,131.450,129.9000,131.23
2015-06-01 00:00:00+00:00,Monday,130.535,131.390,130.0500,131.20
2015-06-02 00:00:00+00:00,Tuesday,129.960,130.655,129.3200,129.86
...,...,...,...,...,...
2020-05-18 00:00:00+00:00,Monday,314.960,316.500,310.3241,313.17
2020-05-19 00:00:00+00:00,Tuesday,313.140,318.520,313.0100,315.03
2020-05-20 00:00:00+00:00,Wednesday,319.230,319.520,316.2000,316.68
2020-05-21 00:00:00+00:00,Thursday,316.850,320.890,315.8700,318.66


If we read the csv without parsing the date, it will end up as regular object

In [22]:
apple_stocks = pd.read_csv(
    "AAPL.csv", 
    usecols=cols,
    )
apple_stocks.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1258 entries, 0 to 1257
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    1258 non-null   object 
 1   close   1258 non-null   float64
 2   high    1258 non-null   float64
 3   low     1258 non-null   float64
 4   open    1258 non-null   float64
dtypes: float64(4), object(1)
memory usage: 49.3+ KB


Let's create a new date format that is not clean, so that we can try convert it to a valid Timestamp

In [23]:
date = pd.to_datetime(apple_stocks['date'])
new_format = date.dt.strftime('&d.%m.%Y_%H:%M_%p')
new_format

0       &d.05.2015_00:00_AM
1       &d.05.2015_00:00_AM
2       &d.05.2015_00:00_AM
3       &d.06.2015_00:00_AM
4       &d.06.2015_00:00_AM
               ...         
1253    &d.05.2020_00:00_AM
1254    &d.05.2020_00:00_AM
1255    &d.05.2020_00:00_AM
1256    &d.05.2020_00:00_AM
1257    &d.05.2020_00:00_AM
Name: date, Length: 1258, dtype: object