In [1]:
import pandas as pd

from datetime import datetime
import time

import sys
sys.path.append('../../')

from pygolfdata.data import shotlink

pd.options.display.max_rows = 200

In [2]:
def mem_usage(pandas_obj):
    if isinstance(pandas_obj,pd.DataFrame):
        usage_b = pandas_obj.memory_usage(deep=True).sum()
    else: # we assume if not a df it's a series
        usage_b = pandas_obj.memory_usage(deep=True)
    usage_mb = usage_b / 1024 ** 2 # convert bytes to megabytes
    return "{:03.2f} MB".format(usage_mb)

In [3]:
d = shotlink.get_combined_data_from_file('../../../golf_course_project_data/combined_shots_and_weather_2016_2016.csv')
d.shape

(1141966, 65)

In [4]:
mem_usage(d)

'604.37 MB'

In [5]:
d.dtypes

TourCode                         category
TourDescription                  category
Year                               uint16
TournamentNum                      uint16
PlayerNum                          uint16
CourseNum                          uint16
PermanentTournamentNum             uint16
PlayerFirstName                  category
PlayerLastName                   category
Round                               uint8
TournamentName                   category
CourseName_shots                   object
Hole                                uint8
HoleScore                         float32
ParValue                            uint8
Yardage                            uint16
Shot                                uint8
ShotType                         category
NumStrokes                          uint8
FromLocationScorer               category
FromLocationEnhanced               object
ToLocationScorer                 category
ToLocationEnhanced               category
Distance                          

In [9]:
str(d['WeatherDateAndHour'].dtype)

'datetime64[ns]'

In [12]:
str(d['Shot'].dtype)

'uint8'

In [7]:
d['TimeDifference']

0            0 days 00:20:00.000000000
1            0 days 00:21:00.000000000
2            0 days 00:21:00.000000000
3            0 days 00:21:00.000000000
4            0 days 00:22:00.000000000
5            0 days 00:22:00.000000000
6            0 days 00:26:00.000000000
7            0 days 00:26:00.000000000
8            0 days 00:26:00.000000000
9            0 days 00:27:00.000000000
10           0 days 00:28:00.000000000
11           0 days 00:28:00.000000000
12           0 days 00:30:00.000000000
13           0 days 00:30:00.000000000
14           0 days 00:30:00.000000000
15         -1 days +23:31:00.000000000
16         -1 days +23:31:00.000000000
17         -1 days +23:31:00.000000000
18         -1 days +23:31:00.000000000
19         -1 days +23:31:00.000000000
20         -1 days +23:32:00.000000000
21         -1 days +23:32:00.000000000
22         -1 days +23:32:00.000000000
23         -1 days +23:32:00.000000000
24         -1 days +23:33:00.000000000
25         -1 days +23:33

In [20]:
d['Time'].min(), d['Time'].max()

(0, 2055)

In [21]:
d['Year'].unique()

array([2012, 2013, 2014, 2015, 2016], dtype=uint64)

In [22]:
d[:3]

Unnamed: 0,TourCode,TourDescription,Year,TournamentNum,PlayerNum,CourseNum,PermanentTournamentNum,PlayerFirstName,PlayerLastName,Round,...,Humidity,Visibility,WindBearing,WindGust,WindSpeed,PrecipitationIntensity,PrecipitationType,CourseName_weather,WeatherDateAndHour,TimeDifference
0,R,PGA TOUR,2012,10,23800,656,16,Bryce,Molder,1,...,0.54,10.0,41.0,,9.41,0.0,,Plantation Course at Kapalua,2012-01-06 11:00:00,-1 days +23:35:00.000000000
1,R,PGA TOUR,2012,10,1116,656,16,Michael,Bradley,1,...,0.54,10.0,41.0,,9.41,0.0,,Plantation Course at Kapalua,2012-01-06 11:00:00,-1 days +23:36:00.000000000
2,R,PGA TOUR,2012,10,23800,656,16,Bryce,Molder,1,...,0.54,10.0,41.0,,9.41,0.0,,Plantation Course at Kapalua,2012-01-06 11:00:00,-1 days +23:41:00.000000000


In [24]:
index = 432843
d.iloc[index]

TourCode                                           R
TourDescription                             PGA TOUR
Year                                            2012
TournamentNum                                    180
PlayerNum                                      29479
CourseNum                                        770
PermanentTournamentNum                            41
PlayerFirstName                                Scott
PlayerLastName                                 Brown
Round                                              2
TournamentName                     Valero Texas Open
CourseName_shots                     TPC San Antonio
Hole                                               9
HoleScore                                          6
ParValue                                           4
Yardage                                          474
Shot                                               4
ShotType                                           S
NumStrokes                                    

In [43]:
d['WindSpeed'].value_counts()

6.15     15488
4.51     14184
5.39     13449
3.95     11706
7.20     11276
6.40     11163
5.70     10972
5.26     10867
5.21     10817
6.48     10778
4.53     10712
4.70     10666
6.51     10548
4.49     10211
4.54     10186
5.86     10173
4.77     10147
4.78     10092
6.00     10080
7.71     10063
7.10      9988
7.76      9930
6.07      9918
7.89      9758
4.35      9650
7.25      9519
4.84      9497
4.36      9481
7.42      9264
7.41      9247
4.30      9233
8.30      9176
5.87      8984
6.80      8971
5.61      8956
4.96      8871
3.12      8850
6.89      8842
5.25      8818
7.33      8792
7.19      8722
8.38      8720
4.72      8710
4.50      8704
7.84      8693
5.73      8681
6.06      8668
7.07      8667
7.00      8615
3.81      8609
8.46      8562
7.53      8545
6.35      8507
3.69      8378
2.27      8347
4.59      8286
6.45      8280
4.71      8202
8.95      8181
4.56      8177
7.56      8151
3.37      8141
5.43      8119
4.08      8052
8.86      8031
7.96      8022
5.77      

In [30]:
d.dtypes

TourCode                   category
TourDescription            category
Year                         uint16
TournamentNum                uint16
PlayerNum                    uint16
CourseNum                    uint16
PermanentTournamentNum       uint16
PlayerFirstName            category
PlayerLastName             category
Round                         uint8
TournamentName             category
CourseName_shots             object
Hole                          uint8
HoleScore                   float32
ParValue                      uint8
Yardage                      uint16
Shot                          uint8
ShotType                   category
NumStrokes                    uint8
FromLocationScorer         category
FromLocationEnhanced         object
ToLocationScorer           category
ToLocationEnhanced         category
Distance                     uint16
DistanceToPin                uint16
InTheHoleFlag              category
AroundTheGreenFlag         category
FirstPuttFlag              c

In [35]:
d.columns

Index(['TourCode', 'TourDescription', 'Year', 'TournamentNum', 'PlayerNum',
       'CourseNum', 'PermanentTournamentNum', 'PlayerFirstName',
       'PlayerLastName', 'Round', 'TournamentName', 'CourseName_shots', 'Hole',
       'HoleScore', 'ParValue', 'Yardage', 'Shot', 'ShotType', 'NumStrokes',
       'FromLocationScorer', 'FromLocationEnhanced', 'ToLocationScorer',
       'ToLocationEnhanced', 'Distance', 'DistanceToPin', 'InTheHoleFlag',
       'AroundTheGreenFlag', 'FirstPuttFlag', 'DistanceToHoleAfterShot',
       'Time', 'Lie', 'Elevation', 'Slope', 'XCoordinate', 'YCoordinate',
       'ZCoordinate', 'DistanceFromCenter', 'DistanceFromEdge', 'Date_shots',
       'LeftRight', 'StrokesGainedBaseline', 'StrokesGainedCategory',
       'RecoveryShot', 'AMWindSpd', 'PMWindSpd', 'AMWindDir', 'PMWindDir',
       'ShotDateAndTime', 'PlayerName', 'Date_weather', 'Hour', 'Latitude',
       'Longitude', 'Summary', 'DegreesFahrenheit', 'Humidity', 'Visibility',
       'WindBearing', 'WindGus

In [34]:
combined_dtypes = shotlink.shot_dtypes.copy()
combined_dtypes.update({'foo':'bar', 'sky':'baz'})
combined_dtypes

OrderedDict([('TourCode', 'category'),
             ('TourDescription', 'category'),
             ('Year', numpy.uint16),
             ('TournamentNum', numpy.uint16),
             ('PlayerNum', numpy.uint16),
             ('CourseNum', numpy.uint16),
             ('PermanentTournamentNum', numpy.uint16),
             ('PlayerFirstName', 'category'),
             ('PlayerLastName', 'category'),
             ('Round', numpy.uint8),
             ('TournamentName', 'category'),
             ('CourseName', 'category'),
             ('Hole', numpy.uint8),
             ('HoleScore', numpy.float32),
             ('ParValue', numpy.uint8),
             ('Yardage', numpy.uint16),
             ('Shot', numpy.uint8),
             ('ShotType', 'category'),
             ('NumStrokes', numpy.uint8),
             ('FromLocationScorer', 'category'),
             ('FromLocationEnhanced', object),
             ('ToLocationScorer', 'category'),
             ('ToLocationEnhanced', 'category'),
           

In [25]:
pd.to_datetime(d.iloc[index]['WeatherDateAndHour'])

Timestamp('2012-04-21 08:00:00')

In [26]:
pd.to_datetime('2012-01-06 11:00:00 PDT').timestamp()

1325876400.0

In [27]:
time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(1325847600 + 28800))

'2012-01-06 11:00:00'

In [42]:
60*8*60

28800

In [44]:
1325847600 + 28800

1325876400

In [2]:
d = pd.read_csv('../../pygolfdata/data/combined_shots_and_weather_2008-2017.csv')
d.shape

  interactivity=interactivity, compiler=compiler, result=result)


(11365769, 64)

In [4]:
mem_usage(d)

'26329.10 MB'

In [5]:
d.dtypes

TourCode                    object
TourDescription             object
Year                         int64
TournamentNum                int64
PlayerNum                    int64
CourseNum                    int64
PermanentTournamentNum       int64
PlayerFirstName             object
PlayerLastName              object
Round                        int64
TournamentName              object
CourseName_shots            object
Hole                         int64
HoleScore                  float64
ParValue                     int64
Yardage                      int64
Shot                         int64
ShotType                    object
NumStrokes                   int64
FromLocationScorer          object
FromLocationEnhanced        object
ToLocationScorer            object
ToLocationEnhanced          object
Distance                     int64
DistanceToPin                int64
InTheHoleFlag               object
AroundTheGreenFlag          object
FirstPuttFlag               object
DistanceToHoleAfterS

In [1]:
d[:3]

NameError: name 'd' is not defined