In [1]:
import pandas
import datetime
import numpy
from scipy.optimize import curve_fit
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
%matplotlib inline

In [2]:
datFileName = "../data/processedSaps.txt"
sapsDataDF = pandas.read_csv(datFileName, sep=' ')
# add dst_bins
dstBins = [ -150, -75, -50, -25, -10, 10 ]
sapsDataDF = pandas.concat( [ sapsDataDF, \
                    pandas.cut( sapsDataDF["dst_index"], \
                               bins=dstBins ) ], axis=1 )
sapsDataDF.columns = [ "dateStr", "sapsLat", "sapsMLT", \
                      "sapsVel", "radId", "poesLat", "poesMLT", \
                      "dst_date", "dst_index", "time", "dst_bin" ]
sapsDataDF = sapsDataDF.drop(["radId", "poesLat", "poesMLT", "dst_date"], 1)
sapsDataDF.head()

Unnamed: 0,dateStr,sapsLat,sapsMLT,sapsVel,dst_index,time,dst_bin
0,20110107,56.5,17.7543,308.2077,-18.0,0,"(-25, -10]"
1,20110107,55.5,18.0147,224.1588,-18.0,0,"(-25, -10]"
2,20110107,56.5,17.8749,307.4328,-18.0,0,"(-25, -10]"
3,20110107,55.5,18.1324,222.4787,-18.0,0,"(-25, -10]"
4,20110107,56.5,17.9955,305.4201,-18.0,0,"(-25, -10]"


In [3]:
# calculate prob of occ by dst_bin, MLT, Lat
sapsDataDF["sapsMLTRounded"] = sapsDataDF["sapsMLT"].map(lambda x: round(x) )
# get a normalized form of MLT where 
# if MLT > 12: MLT = MLT - 24, else MLT = MLT
sapsDataDF['normMLT'] = [x-24 if x >= 12 else x for x in sapsDataDF['sapsMLTRounded']]
sapsDataDF['normLAT'] = [x-57.5 for x in sapsDataDF['sapsLat']]
# Get max points at a given Lat, MLT, DstBin
dstGrps = sapsDataDF.groupby(["dst_index", "sapsMLTRounded", "sapsLat", "normMLT", "normLAT"])
dstSapsMLTLatCountDF = pandas.DataFrame( dstGrps["sapsVel"].count() ).reset_index()
maxCntMLTLatDst = dstSapsMLTLatCountDF.groupby(["dst_index"]).max().reset_index()
maxCntMLTLatDst = maxCntMLTLatDst.drop(["sapsMLTRounded", "sapsLat", "normMLT", "normLAT"], 1)
maxCntMLTLatDst.columns = ["dst_index", "maxCount"]
dstSapsMLTLatCountDF = pandas.merge( dstSapsMLTLatCountDF, maxCntMLTLatDst, \
                              on=["dst_index"], how='inner')
dstSapsMLTLatCountDF.columns = ["dst_index", "sapsMLT", "sapsLat", "normMLT", "normLAT", "dataCount", "maxCount"]
dstSapsMLTLatCountDF["MLT"] = dstSapsMLTLatCountDF["sapsMLT"].map(lambda x: str(int(x)) )
dstSapsMLTLatCountDF["probOcc"] = dstSapsMLTLatCountDF["dataCount"]/dstSapsMLTLatCountDF["maxCount"]
dstSapsMLTLatCountDF.head()

Unnamed: 0,dst_index,sapsMLT,sapsLat,normMLT,normLAT,dataCount,maxCount,MLT,probOcc
0,-147.0,17.0,53.5,-7.0,-4.0,1,14,17,0.071429
1,-147.0,17.0,54.5,-7.0,-3.0,1,14,17,0.071429
2,-147.0,17.0,55.5,-7.0,-2.0,2,14,17,0.142857
3,-147.0,17.0,56.5,-7.0,-1.0,4,14,17,0.285714
4,-147.0,18.0,52.5,-6.0,-5.0,4,14,18,0.285714


In [4]:
sapsModelDF = pandas.DataFrame(columns=["normMLT", "normLAT", "probSAPS", "MLT", "Lat"])
latArr = []
mltArr = []
dstArr = []
for x in range( int(dstSapsMLTLatCountDF["normLAT"].min()), int(dstSapsMLTLatCountDF["normLAT"].max()) + 1 ):
    for y in range( int(dstSapsMLTLatCountDF["normMLT"].min()), int(dstSapsMLTLatCountDF["normMLT"].max()) + 1 ):
        for z in range( int(dstSapsMLTLatCountDF["dst_index"].min()), int(dstSapsMLTLatCountDF["dst_index"].max()) + 1 ):
            latArr.append(x)
            mltArr.append(y)
            dstArr.append(z)
sapsModelDF["normMLT"] = mltArr
sapsModelDF["normLAT"] = latArr
sapsModelDF["dst_index"] = dstArr
sapsModelDF = pandas.merge( sapsModelDF, dstSapsMLTLatCountDF, on=["normMLT", "normLAT", "dst_index"], how="outer" )
sapsModelDF["probOcc"] = sapsModelDF["probOcc"].fillna(0.02)
# sapsModelDF = sapsModelDF[ ["normMLT", "normLAT", "probOcc"] ]
sapsModelDF.head()

Unnamed: 0,normMLT,normLAT,probSAPS,MLT_x,Lat,dst_index,sapsMLT,sapsLat,dataCount,maxCount,MLT_y,probOcc
0,-12,-7,,,,-147,,,,,,0.02
1,-12,-7,,,,-146,,,,,,0.02
2,-12,-7,,,,-145,,,,,,0.02
3,-12,-7,,,,-144,,,,,,0.02
4,-12,-7,,,,-143,,,,,,0.02


In [7]:
def saps_fit_func((normLat, normMLT, dst), a_sx, b_sx, a_sy, b_sy, a_xo, b_xo, a_yo, b_yo, a_o, b_o, theta):
    
    sigma_x = a_sx + b_sx * dst
    sigma_y = a_sy + b_sy * dst
    xo = a_xo + b_xo * dst
    yo = a_yo + b_yo * dst
    amplitude = a_o + b_o * dst    
    
    a = (numpy.cos(theta)**2)/(2*sigma_x**2) + (numpy.sin(theta)**2)/(2*sigma_y**2)
    b = -(numpy.sin(2*theta))/(4*sigma_x**2) + (numpy.sin(2*theta))/(4*sigma_y**2)
    c = (numpy.sin(theta)**2)/(2*sigma_x**2) + (numpy.cos(theta)**2)/(2*sigma_y**2)
    g = amplitude*numpy.exp( - (a*((x-xo)**2) + 2*b*(x-xo)*(y-yo) 
                            + c*((y-yo)**2)))
    return g#.ravel()

In [20]:
initGuess = (3,0.001,2,0.001,4,0.05,-0.5,0.05,1,0.001,1)
popt2, pcov2 = curve_fit(saps_fit_func, (sapsModelDF['normLAT'].T,sapsModelDF['normMLT'].T,sapsModelDF['dst_index'].T), sapsModelDF['probOcc'],
                       p0=initGuess)
print popt2

[  2.79684231e+00  -1.51771272e-02   2.30224111e+00   6.72404226e-03
   5.09976039e-01   5.26796103e-02   4.85988743e+00  -1.15991467e-01
   1.71646074e+01  -2.87004336e-02   7.28840775e-01]


In [14]:
sapsModelDF.head()

Unnamed: 0,normMLT,normLAT,probSAPS,MLT_x,Lat,dst_index,sapsMLT,sapsLat,dataCount,maxCount,MLT_y,probOcc
0,-12,-7,,,,-147,,,,,,0.02
1,-12,-7,,,,-146,,,,,,0.02
2,-12,-7,,,,-145,,,,,,0.02
3,-12,-7,,,,-144,,,,,,0.02
4,-12,-7,,,,-143,,,,,,0.02
