# Cleaning the LAMOST DR8 Dataset / Nettoyage des données de LAMOST DR8

In [1]:
from astropy.table import Table
import pandas as pd
import numpy as np

In [3]:
lrs = Table.read('/arc/projects/k-pop/catalogues/lamost-lrs-dr8v2.fits')
mrs = Table.read('/arc/projects/k-pop/catalogues/lamost-mrs-dr8v2.fits')

Lets start first with the low resolution spectra / On commence avec le catalogue lrs,

The descriptions of the columns in the lrs are given from this webpage / les descriptions des colonnes du catalogue lrs sont écrits sur ce site web: [LAMOST LRS General Catalogue](http://www.lamost.org/dr8/v2.0/doc/lr-data-production-description#S3.1)

Let's start by just choosing stars and removing everything else / Commenconçons-nous seulement avec la sélection des étoiles (suppression des QSO, galaxies, etcs.).

In [4]:
lrs = lrs.to_pandas()

In [41]:
lrsstars = lrs[lrs['class'] == b'STAR']

In [42]:
lrsstars['subclass'].unique()

array([b'G7', b'K7', b'G0', b'dM4', b'F9', b'A2', b'K5', b'K3', b'A9',
       b'F6', b'F5', b'G3', b'G9', b'G6', b'G2', b'G8', b'F2', b'dM0',
       b'A6', b'G5', b'dM2', b'A7', b'WD', b'F0', b'G4', b'F7', b'dM1',
       b'K1', b'B9', b'F4', b'B6', b'K4', b'K0', b'dM5', b'A0', b'F8',
       b'K2', b'A1', b'A3', b'F3', b'Carbon', b'G1', b'sdM0', b'dM3',
       b'DoubleStar', b'A5', b'gM6', b'gM3', b'dM6', b'sdM1', b'A8',
       b'sdM4', b'dM7', b'CV', b'dM8', b'EM', b'dM9', b'sdM3', b'sdM2',
       b'gM1', b'gM8', b'gM0', b'gM7', b'gM4', b'gM5', b'O', b'gM2',
       b'gM9', b'sdM6', b'L0', b'B', b'sdM5', b'A', b'OB', b'CarbonWD',
       b'sdM7', b'G', b'F', b'dK7', b'sdM8'], dtype=object)

b'dM4', b'dM0', b'dM2', b'dM1', b'dM5', b'dM3', b'dM6', b'dM7', b'dM8', b'dM9', b'dK7' -> dwarf \
b'WD' -> white dwarf \
b'Carbon' -> carbon star \
b'sdM0', b'sdM1', b'sdM4', b'sdM3', b'sdM2', b'sdM6', b'sdM5', b'sdM7', b'sdM8' -> subdwarf \
b'DoubleStar' -> two stars \
b'gM6', b'gM3', b'gM1', b'gM8', b'gM0', b'gM7', b'gM4', b'gM5', b'gM2', b'gM9' -> giant \
b'CV' -> cataclysmic variable star \
b'EM' -> either emission lines present in white dwarf or M star of metal rich and emission lines present? only 466 so remove\
b'L0' -> lower than M class, brown dwarf \
b'CarbonWD' -> carbon white dwarf

In [43]:
lrs_star = lrsstars.copy()
classes = [b'dM4', b'dM0', b'dM2', b'dM1', b'dM5', b'dM3', b'dM6', b'dM7', b'dM8', b'dM9', b'dK7',
          b'WD',b'Carbon',b'sdM0', b'sdM1', b'sdM4', b'sdM3', b'sdM2', b'sdM6', b'sdM5', b'sdM7', b'sdM8',
          b'DoubleStar', b'gM6', b'gM3', b'gM1', b'gM8', b'gM0', b'gM7', b'gM4', b'gM5', b'gM2', b'gM9',
          b'CV', b'EM', b'L0', b'CarbonWD']
for spec_class in classes:
    lrs_star = lrs_star[lrs_star['subclass'] != spec_class]
lrs_dwarf = lrsstars.copy()
classes_dwarf = [
    b'G7', b'K7', b'G0', b'F9', b'A2', b'K5', b'K3', b'A9',
    b'F6', b'F5', b'G3', b'G9', b'G6', b'G2', b'G8', b'F2',
    b'A6', b'G5', b'A7', b'WD', b'F0', b'G4', b'F7',
    b'K1', b'B9', b'F4', b'B6', b'K4', b'K0', b'A0', b'F8',
    b'K2', b'A1', b'A3', b'F3', b'Carbon', b'G1',
    b'DoubleStar', b'A5', b'gM6', b'gM3', b'A8', b'CV', b'EM',
    b'gM1', b'gM8', b'gM0', b'gM7', b'gM4', b'gM5', b'O', b'gM2',
    b'gM9', b'L0', b'B', b'A', b'OB', b'CarbonWD', b'G', b'F'
]
for spec_class in classes_dwarf:
    lrs_dwarf = lrs_dwarf[lrs_dwarf['subclass'] != spec_class]
lrs_giant = lrsstars.copy()
classes_giant = [
    b'G7', b'K7', b'G0', b'F9', b'A2', b'K5', b'K3', b'A9',
    b'F6', b'F5', b'G3', b'G9', b'G6', b'G2', b'G8', b'F2',
    b'A6', b'G5', b'A7', b'WD', b'F0', b'G4', b'F7',
    b'K1', b'B9', b'F4', b'B6', b'K4', b'K0', b'A0', b'F8',
    b'K2', b'A1', b'A3', b'F3', b'G1',
    b'DoubleStar', b'A5', b'A8', b'CV', b'EM', b'O',
    b'L0', b'B', b'A', b'OB', b'CarbonWD', b'G', b'F',
    b'dM4', b'dM0', b'dM2', b'dM1', b'dM5', b'dM3', b'dM6',
    b'dM7', b'dM8', b'dM9', b'dK7', b'sdM0', b'sdM1', b'sdM4',
    b'sdM3', b'sdM2', b'sdM6', b'sdM5', b'sdM7', b'sdM8' 
]
for spec_class in classes_giant:
    lrs_giant = lrs_giant[lrs_giant['subclass'] != spec_class]

In [49]:
lrs_giant['subclass'].unique()

array([b'Carbon', b'gM6', b'gM3', b'gM1', b'gM8', b'gM0', b'gM7', b'gM4',
       b'gM5', b'gM2', b'gM9'], dtype=object)

In [51]:
lrs_star

Unnamed: 0,obsid,uid,gp_id,designation,obsdate,lmjd,mjd,planid,spid,fiberid,...,tsource,fibertype,tfrom,tcomment,offsets,offsets_v,ra,dec,fibermask,with_norm_flux
0,101001,b'G12763466510223',2676862492063809792,b'J220848.54-020324.3',b'2011-10-24',55859,55858,b'F5902',1,1,...,b'JF_LEGAS_S',b'Obj',b'-',b'.',0,0.0,332.202274,-2.056767,2,1
1,101002,b'G12763555022689',2676109292239283200,b'J220953.17-020506.0',b'2011-10-24',55859,55858,b'F5902',1,2,...,b'JF_LEGAS_S',b'Obj',b'-',b'.',0,0.0,332.471576,-2.085015,0,0
2,101005,b'G12763553667655',2676108501965287552,b'J221008.50-020659.1',b'2011-10-24',55859,55858,b'F5902',1,5,...,b'JF_LEGAS_S',b'Obj',b'-',b'.',0,0.0,332.535456,-2.116436,0,1
4,101008,b'G12763564851128',2676865000324707584,b'J220928.49-015720.7',b'2011-10-24',55859,55858,b'F5902',1,8,...,b'JF_LEGAS_S',b'Obj',b'-',b'.',0,0.0,332.368745,-1.955771,0,1
5,101009,b'G12763486062384',2676870051206263808,b'J220849.59-015207.1',b'2011-10-24',55859,55858,b'F5902',1,9,...,b'JF_LEGAS_S',b'Obj',b'-',b'.',0,0.0,332.206665,-1.868653,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10633508,824016241,b'G14527339346358',4440489931979093632,b'J162935.35+085612.3',b'2020-05-27',58997,58996,b'HD163300N063640M02',16,241,...,b'LEGUE_LCH',b'Obj',b'LCH100020',b'14875995490671237',0,0.0,247.397330,8.936756,0,0
10633510,824016245,b'G14527465042732',4440430352192704512,b'J162936.51+084403.0',b'2020-05-27',58997,58996,b'HD163300N063640M02',16,245,...,b'LAMOST',b'F-Std',b'DR7',b'SDSS',0,0.0,247.402164,8.734185,0,1
10633511,824016247,b'G14527338588203',4440439801120784896,b'J162937.03+084952.0',b'2020-05-27',58997,58996,b'HD163300N063640M02',16,247,...,b'LEGUE_LCH',b'Obj',b'LCH100020',b'14875994714320494',0,0.0,247.404323,8.831129,0,1
10633513,824016249,b'G14527462725931',4440427873992121984,b'J162921.40+083836.1',b'2020-05-27',58997,58996,b'HD163300N063640M02',16,249,...,b'LEGUE_LCH',b'Obj',b'LCH100020',b'14876121831351018',0,0.0,247.339196,8.643387,0,0


Now, we need to xmatch the lrs catalogue with a different LAMOST DR8 catalogue, so that I can make see how the data looks on a Kiel diagram. I did not find any release papers other then the catalogue descriptions on the website: [Release Note from LAMOST DR8](http://www.lamost.org/dr8/v2.0/doc/release-note)

Maintenant, on besoin de xmatch le catalogue lrs avec un des autres catalogues de LAMOST DR8, pcq je voudrai créer un schéma Kiel pour visualiser l'espace les étoiles occupe. je n'ai pas trouvé des papiers publié à propos les coupures de données sauf sur le site web.

In [53]:
mrs = mrs.to_pandas()

[LAMOST MRS General Catalogue](http://www.lamost.org/dr8/v2.0/doc/mr-data-production-description#s3.1)

In [57]:
mrs.columns

Index(['mobsid', 'obsid', 'uid', 'gp_id', 'designation', 'obsdate', 'lmjd',
       'mjd', 'planid', 'spid', 'fiberid', 'lmjm', 'band', 'ra_obs', 'dec_obs',
       'snr', 'gaia_source_id', 'gaia_g_mean_mag', 'gaia_bp_mean_mag',
       'gaia_rp_mean_mag', 'tsource', 'fibertype', 'tfrom', 'tcomment',
       'offsets', 'offsets_v', 'ra', 'dec', 'rv_b0', 'rv_b0_err', 'rv_b1',
       'rv_b1_err', 'rv_b_flag', 'rv_r0', 'rv_r0_err', 'rv_r1', 'rv_r1_err',
       'rv_r_flag', 'rv_br0', 'rv_br0_err', 'rv_br1', 'rv_br1_err',
       'rv_br_flag', 'rv_lasp0', 'rv_lasp0_err', 'rv_lasp1', 'rv_lasp1_err',
       'coadd', 'fibermask', 'bad_b', 'bad_r', 'moon_angle', 'lunardate',
       'moon_flg'],
      dtype='object')

no classes given