In [65]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import confusion_matrix
from astropy.io import ascii
from utils import col_names, relevant_indices, normalise_sdss_class, ellipticity, filling_factor
import matplotlib.pyplot as plt
%matplotlib inline


In [70]:
datasets = "../datasets/SuperCOSMOS/"
colnames_relevant=[col_names[i] for i in relevant_indices]
uki823_df = ascii.read(datasets + "UKI823/sssedrpair.dat", guess=False, Reader=ascii.FastNoHeader).to_pandas()
data=uki823_df.iloc[:, relevant_indices]
data = pd.DataFrame(data.values, columns = colnames_relevant)   
data['Ellipticity']=  ellipticity(uki823_df.iloc[:,16],uki823_df.iloc[:,17])
data['Filling Factor']= filling_factor(data['AREA'], uki823_df.iloc[:,13],uki823_df.iloc[:,14])
data=normalise_sdss_class(data)

In [71]:
data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15645 entries, 0 to 15644
Data columns (total 24 columns):
AREA              15645 non-null float64
IPEAK             15645 non-null float64
COSMAG            15645 non-null float64
ISKY              15645 non-null float64
A_U               15645 non-null float64
B_U               15645 non-null float64
THETA_U           15645 non-null float64
A_I               15645 non-null float64
B_I               15645 non-null float64
THETA_I           15645 non-null float64
BLEND             15645 non-null float64
QUALITY           15645 non-null float64
N(0,1)            15645 non-null float64
PRFMAG            15645 non-null float64
C_COSMAG          15645 non-null float64
C_PRFMAG          15645 non-null float64
RA_SDSS           15645 non-null float64
DEC_SDSS          15645 non-null float64
GMAG_SDSS         15645 non-null float64
RMAG_SDSS         15645 non-null float64
IMAG_SDSS         15645 non-null float64
CLASS_SDSS        15645 non-nu

In [63]:
print("There are {} entries and {} columns in the uki823 DataFramewhen we select the relevant columns"\
      .format(data.shape[0], data.shape[1]))


There are 15645 entries and 24 columns in the uki823 DataFramewhen we select the relevant columns


In [73]:
data.describe()

Unnamed: 0,AREA,IPEAK,COSMAG,ISKY,A_U,B_U,THETA_U,A_I,B_I,THETA_I,...,C_COSMAG,C_PRFMAG,RA_SDSS,DEC_SDSS,GMAG_SDSS,RMAG_SDSS,IMAG_SDSS,CLASS_SDSS,Ellipticity,Filling Factor
count,15645.0,15645.0,15645.0,15645.0,15645.0,15645.0,15645.0,15645.0,15645.0,15645.0,...,15645.0,15645.0,15645.0,15645.0,15645.0,15645.0,15645.0,15645.0,15645.0,15645.0
mean,92.001151,13193640.0,-20152.272867,18080980.0,5423.954618,4149.977245,84.416874,4291.177181,3267.532886,81.601214,...,17.481817,17.367207,174.007529,0.048635,19.528314,18.495397,17.893314,1.628444,-0.033059,inf
std,314.990581,18171660.0,1971.753396,444152.0,3550.485593,3341.520804,57.212674,1935.854217,1881.680318,55.967697,...,1.285655,1.894035,179.404607,0.724706,2.194382,1.931732,1.773068,0.483236,0.030039,
min,8.0,1297920.0,-28893.0,17099830.0,1633.0,538.0,0.0,1551.0,524.0,0.0,...,11.154,4.781,0.000165,-1.243447,8.98526,6.618781,6.940934,1.0,-0.285714,1.471725e-06
25%,17.0,2348600.0,-21393.0,17698420.0,3434.0,2121.0,35.0,3235.0,2035.0,30.0,...,16.776,16.536,0.502112,-0.569497,18.465448,17.573668,17.055212,1.0,-0.048045,2.697026e-05
50%,37.0,3912512.0,-19703.0,18029930.0,4537.0,3286.0,79.0,4006.0,3000.0,73.0,...,17.858,18.019,0.96911,0.075373,19.967171,18.960104,18.355196,2.0,-0.026707,5.010798e-05
75%,92.0,14174210.0,-18561.0,18459080.0,6325.0,5203.0,135.0,4777.0,4040.0,134.0,...,18.471,18.748,359.492904,0.682301,21.087143,19.793398,19.087181,2.0,-0.008006,0.0001382349
max,24006.0,121054900.0,-17410.0,18995850.0,101032.0,90696.0,999.0,59866.0,55313.0,179.0,...,19.292,19.399,359.999933,1.273383,22.999374,24.80249,27.166468,2.0,0.001029,inf
