In [1]:
import pandas
import datetime
import numpy
from scipy.optimize import curve_fit
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
%matplotlib inline

In [2]:
datFileName = "../data/processedSaps.txt"
sapsDataDF = pandas.read_csv(datFileName, sep=' ')
# add dst_bins
dstBins = [ -150, -75, -50, -25, -10, 10 ]
sapsDataDF = pandas.concat( [ sapsDataDF, \
                    pandas.cut( sapsDataDF["dst_index"], \
                               bins=dstBins ) ], axis=1 )
sapsDataDF.columns = [ "dateStr", "sapsLat", "sapsMLT", \
                      "sapsVel", "radId", "poesLat", "poesMLT", \
                      "dst_date", "dst_index", "time", "dst_bin" ]
sapsDataDF = sapsDataDF.drop(["radId", "poesLat", "poesMLT", "dst_date"], 1)
sapsDataDF.head()

Unnamed: 0,dateStr,sapsLat,sapsMLT,sapsVel,dst_index,time,dst_bin
0,20110107,56.5,17.7543,308.2077,-18.0,0,"(-25, -10]"
1,20110107,55.5,18.0147,224.1588,-18.0,0,"(-25, -10]"
2,20110107,56.5,17.8749,307.4328,-18.0,0,"(-25, -10]"
3,20110107,55.5,18.1324,222.4787,-18.0,0,"(-25, -10]"
4,20110107,56.5,17.9955,305.4201,-18.0,0,"(-25, -10]"


In [3]:
# calculate prob of occ by dst_bin, MLT, Lat
sapsDataDF["sapsMLTRounded"] = sapsDataDF["sapsMLT"].map(lambda x: round(x) )
# get a normalized form of MLT where 
# if MLT > 12: MLT = MLT - 24, else MLT = MLT
sapsDataDF['normMLT'] = [x-24 if x >= 12 else x for x in sapsDataDF['sapsMLTRounded']]
sapsDataDF['normLAT'] = [x-57.5 for x in sapsDataDF['sapsLat']]
# Get max points at a given Lat, MLT, DstBin
dstGrps = sapsDataDF.groupby(["dst_bin", "sapsMLTRounded", "sapsLat", "normMLT", "normLAT"])
dstSapsMLTLatCountDF = pandas.DataFrame( dstGrps["sapsVel"].count() ).reset_index()
maxCntMLTLatDst = dstSapsMLTLatCountDF.groupby(["dst_bin"]).max().reset_index()
maxCntMLTLatDst = maxCntMLTLatDst.drop(["sapsMLTRounded", "sapsLat", "normMLT", "normLAT"], 1)
maxCntMLTLatDst.columns = ["dst_bin", "maxCount"]
dstSapsMLTLatCountDF = pandas.merge( dstSapsMLTLatCountDF, maxCntMLTLatDst, \
                              on=["dst_bin"], how='inner')
dstSapsMLTLatCountDF.columns = ["dst_bin", "sapsMLT", "sapsLat", "normMLT", "normLAT", "dataCount", "maxCount"]
dstSapsMLTLatCountDF["MLT"] = dstSapsMLTLatCountDF["sapsMLT"].map(lambda x: str(int(x)) )
dstSapsMLTLatCountDF["probOcc"] = dstSapsMLTLatCountDF["dataCount"]/dstSapsMLTLatCountDF["maxCount"]
dstSapsMLTLatCountDF.head()

Unnamed: 0,dst_bin,sapsMLT,sapsLat,normMLT,normLAT,dataCount,maxCount,MLT,probOcc
0,"(-150, -75]",0.0,51.5,0.0,-6.0,1,333,0,0.003003
1,"(-150, -75]",0.0,52.5,0.0,-5.0,22,333,0,0.066066
2,"(-150, -75]",0.0,53.5,0.0,-4.0,26,333,0,0.078078
3,"(-150, -75]",0.0,54.5,0.0,-3.0,36,333,0,0.108108
4,"(-150, -75]",0.0,55.5,0.0,-2.0,50,333,0,0.15015


In [4]:
# select the required dst_bin
# the bins are :
# ['(-150, -75]' '(-75, -50]' '(-50, -25]' '(-25, -10]' '(-10, 10]']
saps5075 = dstSapsMLTLatCountDF[ \
                    dstSapsMLTLatCountDF["dst_bin"] == "(-10, 10]" ]

In [5]:
sapsModelDF = pandas.DataFrame(columns=["normMLT", "normLAT", "probSAPS", "MLT", "Lat"])
latArr = []
mltArr = []
for x in range( int(saps5075["normLAT"].min()), int(saps5075["normLAT"].max()) + 1 ):
    for y in range( int(saps5075["normMLT"].min()), int(saps5075["normMLT"].max()) + 1 ):
        latArr.append(x)
        mltArr.append(y)
sapsModelDF["normMLT"] = mltArr
sapsModelDF["normLAT"] = latArr
sapsModelDF = pandas.merge( sapsModelDF, saps5075, on=["normMLT", "normLAT"], how="outer" )
sapsModelDF["probOcc"] = sapsModelDF["probOcc"].fillna(0.02)
# sapsModelDF = sapsModelDF[ ["normMLT", "normLAT", "probOcc"] ]
sapsModelDF.head()

Unnamed: 0,normMLT,normLAT,probSAPS,MLT_x,Lat,dst_bin,sapsMLT,sapsLat,dataCount,maxCount,MLT_y,probOcc
0,-9,-7,,,,,,,,,,0.02
1,-8,-7,,,,,,,,,,0.02
2,-7,-7,,,,,,,,,,0.02
3,-6,-7,,,,,,,,,,0.02
4,-5,-7,,,,,,,,,,0.02


In [6]:
def twoD_Gaussian((x, y), amplitude, xo, yo, sigma_x, sigma_y):
    theta = 0.633
    xo = float(xo)
    yo = float(yo)    
    a = (numpy.cos(theta)**2)/(2*sigma_x**2) + (numpy.sin(theta)**2)/(2*sigma_y**2)
    b = -1*(numpy.sin(2*theta))/(4*sigma_x**2) + (numpy.sin(2*theta))/(4*sigma_y**2)
    c = (numpy.sin(theta)**2)/(2*sigma_x**2) + (numpy.cos(theta)**2)/(2*sigma_y**2)
    g = amplitude*numpy.exp( - (a*((x-xo)**2) + 2*b*(x-xo)*(y-yo) 
                            + c*((y-yo)**2)))
    return g.ravel()

In [7]:
initGuess = (1,4,-1,2,2)
popt2, pcov2 = curve_fit(twoD_Gaussian, (sapsModelDF['normLAT'],sapsModelDF['normMLT']), sapsModelDF['probOcc'],
                       p0=initGuess)
print popt2 
print pcov2

[ 0.83570584  4.5310208  -1.25487903  3.27245002  1.6968925 ]
[[  4.49940931e-04  -2.49617309e-04   1.27913351e-04  -1.17799172e-03
   -4.66872699e-04]
 [ -2.49617309e-04   8.05479602e-03  -3.92479200e-03   4.20034225e-03
    1.99506946e-04]
 [  1.27913351e-04  -3.92479200e-03   4.40847338e-03  -2.45360375e-03
    5.32491554e-05]
 [ -1.17799172e-03   4.20034225e-03  -2.45360375e-03   1.23986331e-02
    4.42010537e-05]
 [ -4.66872699e-04   1.99506946e-04   5.32491554e-05   4.42010537e-05
    1.95331308e-03]]


In [8]:
dstBinArr = [ "(-150, -75]", "(-75, -50]", "(-50, -25]", "(-25, -10]", "(-10, 10]" ]
dstVals = [ -115., -65., -38., -18., 0. ]
ampArr = [ 1.00, 0.988, 1.02, 0.88, 0.84 ]
xoArr = [ -1.17, 0.56, 2.19, 3.65, 4.53 ]

yoArr = [ -3.94, -3.47, -2.69, -1.64, -1.25 ]
sigxArr = [ 2.78, 2.87, 2.82, 2.79, 3.27 ]
sigyArr = [ -1.75, 1.57, 1.79, 1.58, 1.69 ]


In [9]:
9.57599617e+06 % 6.3

2.4700000001955003