# SigmaClip

In [1]:
% matplotlib inline

import time
import pandas as pd

from IPython import display as d

import matplotlib.pyplot as plt
import seaborn as sns
# sns.set()

from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.feature_selection import RFE, RFECV
from sklearn import feature_selection as fs

from libs import container
from libs.experiment import (
    WithAnotherExperiment, KFoldExperiment, roc, 
    discretize_classes, clean_features)

## 1. Load Data

In [2]:
start = time.time()

path = "/home/data/carpyncho/stored/samples/smp_2500_rrVSunk/"
data = container.read(path)

print("Removing b220...")
del data["b220"]

for df in data.values():
    df["AmplitudeJH"] = df["AmplitudeJ"] - df["AmplitudeH"]
    df["AmplitudeJK"] = df["AmplitudeJ"] - df["Amplitude"]

Loading '/home/data/carpyncho/stored/samples/smp_2500_rrVSunk/b261.npy'...
Loading '/home/data/carpyncho/stored/samples/smp_2500_rrVSunk/b263.npy'...
Loading '/home/data/carpyncho/stored/samples/smp_2500_rrVSunk/b220.npy'...
Loading '/home/data/carpyncho/stored/samples/smp_2500_rrVSunk/b264.npy'...
Loading '/home/data/carpyncho/stored/samples/smp_2500_rrVSunk/b262.npy'...
Loading '/home/data/carpyncho/stored/samples/smp_2500_rrVSunk/b278.npy'...
Removing b220...


##  2. Preprocess

### 2.1. Discretize the classes

In [3]:
data, classes, sclasses = discretize_classes(data)
rclasses = {k:v for v, k in classes.items()}
d.display(d.Markdown("**Classes**"))
d.display(classes)

d.display(d.Markdown("----"))
d.display(d.Markdown("**Simplified Classes**"))
d.display(sclasses)

**Classes**

{'': 0,
 'Cep-1': 1,
 'Cep-F': 2,
 'LPV-Mira': 3,
 'LPV-OSARG': 4,
 'LPV-SRV': 5,
 'RRLyr-RRab': 6,
 'RRLyr-RRc': 7,
 'RRLyr-RRd': 8,
 'T2Cep-BLHer': 9,
 'T2Cep-RVTau': 10,
 'T2Cep-WVir': 11}

----

**Simplified Classes**

{'': 0, 'Cep': 1, 'LPV': 2, 'RRLyr': 3, 'T2Cep': 4}

In [4]:
rclasses

{0: '',
 1: 'Cep-1',
 2: 'Cep-F',
 3: 'LPV-Mira',
 4: 'LPV-OSARG',
 5: 'LPV-SRV',
 6: 'RRLyr-RRab',
 7: 'RRLyr-RRc',
 8: 'RRLyr-RRd',
 9: 'T2Cep-BLHer',
 10: 'T2Cep-RVTau',
 11: 'T2Cep-WVir'}

In [8]:
for k, df in data.items():
    data[k] = df.assign(tile=[k for idx in range(len(df))])
for k, df in data.items():
    data[k] = df.assign(ones=[1 for idx in range(len(df))])

In [20]:
df = pd.concat(data.values())
df = df.reset_index(drop=True)

In [34]:
groups = df.groupby(["ogle3_type", "tile"])

In [24]:
len(set(df.index))

22661

Unnamed: 0,id,ogle3_type,cnt,Amplitude,AndersonDarling,Autocor_length,Beyond1Std,CAR_mean,CAR_sigma,CAR_tau,...,scls_j,scls_k,AmplitudeJH,AmplitudeJK,cls,real_cls,scls,real_scls,ones,tile
0,32620000299758,T2Cep-WVir,62,0.31025,1.000000,1.0,0.322581,4.007449,1.758976e-01,2.779502,...,-1,-1,0.123987,0.13865,11,11,4,4,1,b262
1,32620000243907,T2Cep-BLHer,124,0.22500,1.000000,2.0,0.282258,29.474036,-3.081963e-01,0.452656,...,-1,-1,-0.042250,-0.08300,9,9,4,4,1,b262
2,32620000410013,T2Cep-BLHer,124,0.23400,1.000000,2.0,0.362903,31.224994,-3.604839e-01,0.436031,...,-1,-1,-0.024700,-0.05960,9,9,4,4,1,b262
3,32620000644245,RRLyr-RRd,124,0.09850,0.939799,2.0,0.298387,219.379290,-2.549065e-01,0.062734,...,-1,-1,-0.288925,-0.41190,8,8,3,3,1,b262
4,32620000150949,RRLyr-RRd,124,0.08150,0.994361,2.0,0.346774,117.847367,1.540298e-01,0.118285,...,-1,-1,-0.322075,-0.45610,8,8,3,3,1,b262
5,32620000002448,RRLyr-RRab,122,0.17100,0.998126,2.0,0.311475,136.009559,-3.274306e-01,0.108351,...,-1,-1,-0.147550,-0.22340,6,6,3,3,1,b262
6,32620000002625,RRLyr-RRab,118,0.63500,1.000000,1.0,0.152542,569891.138440,8.649599e+01,0.000025,...,-1,-1,0.757250,0.98300,6,6,3,3,1,b262
7,32620000003737,LPV-OSARG,124,1.04500,1.000000,1.0,0.000000,199500.849759,1.255277e+02,0.000049,...,-1,-1,1.556750,2.04900,4,4,2,2,1,b262
8,32620000004125,LPV-OSARG,124,0.59900,1.000000,1.0,0.000000,311637.384778,7.768973e+01,0.000030,...,-1,-1,0.687050,0.88940,4,4,2,2,1,b262
9,32620000004421,LPV-OSARG,61,1.26625,0.980310,1.0,0.000000,2791.880570,1.627299e+01,0.003548,...,-1,-1,1.988188,2.62425,4,4,2,2,1,b262


In [56]:
df.ogle3_type


0        T2Cep-WVir
1       T2Cep-BLHer
2       T2Cep-BLHer
3         RRLyr-RRd
4         RRLyr-RRd
5        RRLyr-RRab
6        RRLyr-RRab
7         LPV-OSARG
8         LPV-OSARG
9         LPV-OSARG
10        LPV-OSARG
11        LPV-OSARG
12        LPV-OSARG
13        LPV-OSARG
14        RRLyr-RRc
15        LPV-OSARG
16        LPV-OSARG
17        LPV-OSARG
18        LPV-OSARG
19       RRLyr-RRab
20       RRLyr-RRab
21        LPV-OSARG
22        LPV-OSARG
23       RRLyr-RRab
24        LPV-OSARG
25          LPV-SRV
26        LPV-OSARG
27       RRLyr-RRab
28        LPV-OSARG
29        LPV-OSARG
           ...     
4129               
4130               
4131               
4132               
4133               
4134               
4135               
4136               
4137               
4138               
4139               
4140               
4141               
4142               
4143               
4144               
4145               
4146               
4147               


In [66]:
tiles = {}

for tile, df in data.items():
    values = []
    for cls in sorted(classes):
        values.append(len(df[df.ogle3_type== cls]))
    tiles[tile] = values
    
coso = pd.DataFrame(tiles, index=sorted(classes))
#     print groups.size()[cls].values
# df.pivot(index='date', columns='variable', values='value')

In [76]:
coso.T.sum().sum()



22661

In [10]:
from astropy.io import fits
import glob

In [22]:
lista = []
for p in glob.glob("/home/data/vvvx/casu.ast.cam.ac.uk/~eglez/vistasp/requests/106579/catalogues/*.fits"):
    with fits.open(p) as cat:
        hdu = cat[0]
        lista.append(hdu.header["OBJECT"])
len(lista), set(lista)

(66, {'b410'})

In [23]:
lista = []
for p in glob.glob("/home/data/vvvx/casu.ast.cam.ac.uk/~eglez/vistasp/requests/106578/catalogues/*.fits"):
    with fits.open(p) as cat:
        hdu = cat[0]
        lista.append(hdu.header["OBJECT"])
len(lista), set(lista)

(72, {'b437'})

In [4]:
cat =fits.open("/home/data/vvvx/casu.ast.cam.ac.uk/~eglez/vistasp/requests/106578/catalogues/v20160715_00481_st_cat.fits")

In [14]:
hdu = cat[0]

'b437'