In [1]:
from osgeo import ogr
import csv
import pandas as pd
import numpy as np

In [2]:
class Point(object):
    """ Wrapper for ogr point """
    def __init__(self, lat, lng):
        """ Coordinates are in degrees """
        self.point = ogr.Geometry(ogr.wkbPoint)
        self.point.AddPoint(lng, lat)
    
    def getOgr(self):
        return self.point
    ogr = property(getOgr)

class Country(object):
    """ Wrapper for ogr country shape. Not meant to be instantiated directly. """
    def __init__(self, shape):
        self.shape = shape
    
    def getIso(self):
        return self.shape.GetField('ISO2')
    iso = property(getIso)
    
    def __str__(self):
        return self.shape.GetField('NAME')
    
    def contains(self, point):
        return self.shape.geometry().Contains(point.ogr)

class CountryChecker(object):
    """ Loads a country shape file, checks coordinates for country location. """
    
    def __init__(self, country_file):
        driver = ogr.GetDriverByName('ESRI Shapefile')
        self.countryFile = driver.Open(country_file)
        self.layer = self.countryFile.GetLayer()
    
    def getCountry(self, point):
        """
        Checks given gps-incoming coordinates for country.
        Output is either country shape index or None
        """
        
        for i in range(self.layer.GetFeatureCount()):
            country = self.layer.GetFeature(i)
            if country.geometry().Contains(point.ogr):
                return Country(country)
        # nothing found
        return None

In [3]:
#load countries shapefile
cc = CountryChecker('TM_WORLD_BORDERS-0.3.shp')

In [4]:
#check
print cc.getCountry(Point(-23.6890,150.0994))

Australia


In [111]:
data = pd.read_csv('MODIS2018.csv')

In [112]:
data

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
0,-18.2242,144.9556,327.1,1.9,1.4,2018-01-01,15,Terra,MODIS,45,6.2,304.7,20.0,D,0
1,-18.0564,144.6842,327.2,2.1,1.4,2018-01-01,15,Terra,MODIS,55,6.2,296.6,32.2,D,0
2,-18.0529,144.6651,340.4,2.1,1.4,2018-01-01,15,Terra,MODIS,88,6.2,297.1,83.9,D,0
3,-18.0558,144.6914,324.4,2.1,1.4,2018-01-01,15,Terra,MODIS,42,6.2,297.1,22.8,D,0
4,-18.0524,144.6723,343.9,2.1,1.4,2018-01-01,15,Terra,MODIS,90,6.2,297.6,102.1,D,0
5,-18.0489,144.6530,336.1,2.1,1.4,2018-01-01,15,Terra,MODIS,84,6.2,297.6,62.6,D,0
6,-18.0433,144.6938,328.3,2.1,1.4,2018-01-01,15,Terra,MODIS,63,6.2,300.2,32.9,D,0
7,-17.9494,144.7574,323.2,2.0,1.4,2018-01-01,15,Terra,MODIS,63,6.2,300.6,20.5,D,0
8,-17.9460,144.7383,325.2,2.0,1.4,2018-01-01,15,Terra,MODIS,69,6.2,300.6,26.2,D,0
9,-17.9161,144.6444,325.6,2.1,1.4,2018-01-01,15,Terra,MODIS,41,6.2,297.2,22.4,D,0


In [34]:
for index, row in data.head(n=2).iterrows():
     print(index, row)

(0, latitude        -18.2242
longitude        144.956
brightness         327.1
scan                 1.9
track                1.4
acq_date      2018-01-01
acq_time              15
satellite          Terra
instrument         MODIS
confidence            45
version              6.2
bright_t31         304.7
frp                   20
daynight               D
type                   0
Name: 0, dtype: object)
(1, latitude        -18.0564
longitude        144.684
brightness         327.2
scan                 2.1
track                1.4
acq_date      2018-01-01
acq_time              15
satellite          Terra
instrument         MODIS
confidence            55
version              6.2
bright_t31         296.6
frp                 32.2
daynight               D
type                   0
Name: 1, dtype: object)


In [113]:
data1 = data.iloc[np.r_[0:100, 400000:400100, 800000:800100,1000000:1000100, 1200000:1200100,  1600000:1600100, 2000000:2000100, 2400000:2400100, 2800000:2800100, 3200000:3200100, 3400000:3400100, 3600000:3600100, 3800000:3800100,4200000:4200100],:].reset_index() 
#get sample from different ranges of dataframe to have different months, use data if want to use full dataset
data1['month'] = data1['acq_date'].str[5:7] #get month

In [114]:
data1

Unnamed: 0,index,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type,month
0,0,-18.2242,144.9556,327.1,1.9,1.4,2018-01-01,15,Terra,MODIS,45,6.2,304.7,20.0,D,0,01
1,1,-18.0564,144.6842,327.2,2.1,1.4,2018-01-01,15,Terra,MODIS,55,6.2,296.6,32.2,D,0,01
2,2,-18.0529,144.6651,340.4,2.1,1.4,2018-01-01,15,Terra,MODIS,88,6.2,297.1,83.9,D,0,01
3,3,-18.0558,144.6914,324.4,2.1,1.4,2018-01-01,15,Terra,MODIS,42,6.2,297.1,22.8,D,0,01
4,4,-18.0524,144.6723,343.9,2.1,1.4,2018-01-01,15,Terra,MODIS,90,6.2,297.6,102.1,D,0,01
5,5,-18.0489,144.6530,336.1,2.1,1.4,2018-01-01,15,Terra,MODIS,84,6.2,297.6,62.6,D,0,01
6,6,-18.0433,144.6938,328.3,2.1,1.4,2018-01-01,15,Terra,MODIS,63,6.2,300.2,32.9,D,0,01
7,7,-17.9494,144.7574,323.2,2.0,1.4,2018-01-01,15,Terra,MODIS,63,6.2,300.6,20.5,D,0,01
8,8,-17.9460,144.7383,325.2,2.0,1.4,2018-01-01,15,Terra,MODIS,69,6.2,300.6,26.2,D,0,01
9,9,-17.9161,144.6444,325.6,2.1,1.4,2018-01-01,15,Terra,MODIS,41,6.2,297.2,22.4,D,0,01


In [115]:
for index, row in data1.iterrows():
    country = cc.getCountry(Point(data1.iloc[index]['latitude'],data1.iloc[index]['longitude']))
    #get country
    data1.at[index,'country'] = country
    if data1.iloc[index]['latitude'] > 0:
        data1.at[index,'hemisphere'] = 'N'
    else:
        data1.at[index,'hemisphere'] = 'S'
    #get hemisphere

In [116]:
#convert to string
data1['country']= data1['country'].astype(str)
data1['hemisphere']= data1['hemisphere'].astype(str)

In [117]:
# group by month
data1_month = data1.groupby(['month']).size().reset_index().rename(columns={0:'count'})

In [118]:
data1_month

Unnamed: 0,month,count
0,1,100
1,2,100
2,3,100
3,4,100
4,5,100
5,6,100
6,7,100
7,8,100
8,9,200
9,10,100


In [119]:
#group by month and hemisphere
data1_month_hemisphere = data1.groupby(['month','hemisphere']).size().reset_index().rename(columns={0:'count'})

In [120]:
data1_month_hemisphere

Unnamed: 0,month,hemisphere,count
0,1,N,61
1,1,S,39
2,2,N,58
3,2,S,42
4,3,N,100
5,4,N,100
6,5,S,100
7,6,S,100
8,7,S,100
9,8,S,100


In [121]:
#group by country
data1_country = data1.groupby(['country']).size().reset_index().rename(columns={0:'count'})

In [122]:
data1_country

Unnamed: 0,country,count
0,Angola,66
1,Australia,180
2,Belize,12
3,Bolivia,46
4,Botswana,3
5,Brazil,198
6,Bulgaria,1
7,Burma,51
8,Cameroon,8
9,Central African Republic,28


In [30]:
#data1.dtypes

index           int64
latitude      float64
longitude     float64
brightness    float64
scan          float64
track         float64
acq_date       object
acq_time        int64
satellite      object
instrument     object
confidence      int64
version       float64
bright_t31    float64
frp           float64
daynight       object
type            int64
month          object
country        object
hemisphere     object
dtype: object

In [123]:
#data1.to_csv('sample_data_2018.csv', sep=',', encoding='utf-8')
data1_month.to_csv('sample_data_bymonth_2018.csv', sep=',', encoding='utf-8')
data1_month_hemisphere.to_csv('sample_data_bymonth_hem_2018.csv', sep=',', encoding='utf-8')
data1_country.to_csv('sample_data_bycountry_2018.csv', sep=',', encoding='utf-8')