In [1]:
import pandas as pd
import numpy as np

Import GAIA-Dataset
===
GAIA from https://vizier.cds.unistra.fr/viz-bin/VizieR-3?-source=I/355

In [2]:
gaia = pd.read_csv("GAIA.csv")

In [3]:
gaia

Unnamed: 0,source_id,ra,dec,pseudocolour,phot_rp_mean_mag,phot_bp_mean_mag,dist,gmag,pmra,pmdec,radial_velocity
0,418551920284673408,10.127242,56.537189,1.395197,1.843503,2.986986,71.198784,1.942524,49.125645,-31.595375,
1,4357027756659697664,243.586211,-3.694968,1.372805,1.918922,3.482232,48.330590,2.016425,-45.340020,-144.432720,
2,5589311357728452608,109.285594,-37.097444,1.336192,1.908033,3.450613,198.306850,2.083237,-11.562000,6.520112,
3,4993479684438433792,6.572156,-42.307820,1.441150,2.032224,3.301706,24.927925,2.089977,176.268300,-398.872130,
4,4038055447778237312,274.406090,-36.762429,1.179356,1.915412,3.742679,42.107616,2.116495,-131.804080,-166.308930,
...,...,...,...,...,...,...,...,...,...,...,...
9995,2167693294501199360,309.570953,48.070220,,5.706972,6.836160,128.987300,6.358699,48.169006,23.854433,-27.147230
9996,969752767321089792,90.688324,47.809953,,5.906720,6.635459,27.568922,6.358743,35.560543,95.238960,-76.854250
9997,6356450820967499264,328.795558,-76.940643,,4.996496,8.486092,384.096100,6.358819,34.673294,3.875128,-16.283632
9998,3322229466284258432,92.280195,7.513513,,5.699785,6.849621,225.877990,6.358908,-1.921085,-2.928185,42.222054


Remove Stars with no Distance-Parameter
===

In [4]:
gaia = gaia[~gaia["dist"].isnull()]
gaia.count()

source_id           9923
ra                  9923
dec                 9923
pseudocolour        1394
phot_rp_mean_mag    9923
phot_bp_mean_mag    9923
dist                9923
gmag                9923
pmra                9923
pmdec               9923
radial_velocity     7659
dtype: int64

Replace Radial Velocity Values that are Null with zero
===

In [5]:
#Replace NaN-Radial-Velocities with 0
gaia["radial_velocity"] = gaia["radial_velocity"].fillna(0.0)
gaia

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gaia["radial_velocity"] = gaia["radial_velocity"].fillna(0.0)


Unnamed: 0,source_id,ra,dec,pseudocolour,phot_rp_mean_mag,phot_bp_mean_mag,dist,gmag,pmra,pmdec,radial_velocity
0,418551920284673408,10.127242,56.537189,1.395197,1.843503,2.986986,71.198784,1.942524,49.125645,-31.595375,0.000000
1,4357027756659697664,243.586211,-3.694968,1.372805,1.918922,3.482232,48.330590,2.016425,-45.340020,-144.432720,0.000000
2,5589311357728452608,109.285594,-37.097444,1.336192,1.908033,3.450613,198.306850,2.083237,-11.562000,6.520112,0.000000
3,4993479684438433792,6.572156,-42.307820,1.441150,2.032224,3.301706,24.927925,2.089977,176.268300,-398.872130,0.000000
4,4038055447778237312,274.406090,-36.762429,1.179356,1.915412,3.742679,42.107616,2.116495,-131.804080,-166.308930,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
9995,2167693294501199360,309.570953,48.070220,,5.706972,6.836160,128.987300,6.358699,48.169006,23.854433,-27.147230
9996,969752767321089792,90.688324,47.809953,,5.906720,6.635459,27.568922,6.358743,35.560543,95.238960,-76.854250
9997,6356450820967499264,328.795558,-76.940643,,4.996496,8.486092,384.096100,6.358819,34.673294,3.875128,-16.283632
9998,3322229466284258432,92.280195,7.513513,,5.699785,6.849621,225.877990,6.358908,-1.921085,-2.928185,42.222054


Rename Columns
===

In [6]:
#Rename rp and bp Parameters
gaia = gaia.rename({
    "phot_rp_mean_mag": "rp", 
    "phot_bp_mean_mag": "bp"
}, axis=1)
gaia.count()

source_id          9923
ra                 9923
dec                9923
pseudocolour       1394
rp                 9923
bp                 9923
dist               9923
gmag               9923
pmra               9923
pmdec              9923
radial_velocity    9923
dtype: int64

Calculate Cartesian Coordinates from Equatorial Coordinates
===

In [8]:
#Calculate XYZ
#r: radius, phi: right ascension, theta: declination
def rpt_to_x(r, phi, theta):
    return r * np.sin(np.radians(theta + 90)) * np.cos(np.radians(phi))

def rpt_to_y(r, phi, theta):
    return r * np.sin(np.radians(theta + 90)) * np.sin(np.radians(phi))

def rpt_to_z(r, phi, theta):
    return r * np.cos(np.radians(theta + 90))

gaia["x"] = rpt_to_x(gaia["dist"], gaia["ra"], gaia["dec"])
gaia["y"] = rpt_to_y(gaia["dist"], gaia["ra"], gaia["dec"])
gaia["z"] = rpt_to_z(gaia["dist"], gaia["ra"], gaia["dec"])

In [9]:
gaia

Unnamed: 0,source_id,ra,dec,pseudocolour,rp,bp,dist,gmag,pmra,pmdec,radial_velocity,x,y,z
0,418551920284673408,10.127242,56.537189,1.395197,1.843503,2.986986,71.198784,1.942524,49.125645,-31.595375,0.000000,38.647035,6.903045,-59.397151
1,4357027756659697664,243.586211,-3.694968,1.372805,1.918922,3.482232,48.330590,2.016425,-45.340020,-144.432720,0.000000,-21.455206,-43.195127,3.114648
2,5589311357728452608,109.285594,-37.097444,1.336192,1.908033,3.450613,198.306850,2.083237,-11.562000,6.520112,0.000000,-52.240485,149.295733,119.613221
3,4993479684438433792,6.572156,-42.307820,1.441150,2.032224,3.301706,24.927925,2.089977,176.268300,-398.872130,0.000000,18.314032,2.109987,16.779322
4,4038055447778237312,274.406090,-36.762429,1.179356,1.915412,3.742679,42.107616,2.116495,-131.804080,-166.308930,0.000000,2.591571,-33.633726,25.201341
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2167693294501199360,309.570953,48.070220,,5.706972,6.836160,128.987300,6.358699,48.169006,23.854433,-27.147230,54.907049,-66.439774,-95.961951
9996,969752767321089792,90.688324,47.809953,,5.906720,6.635459,27.568922,6.358743,35.560543,95.238960,-76.854250,-0.222426,18.513728,-20.426401
9997,6356450820967499264,328.795558,-76.940643,,4.996496,8.486092,384.096100,6.358819,34.673294,3.875128,-16.283632,74.233999,-44.965575,374.162030
9998,3322229466284258432,92.280195,7.513513,,5.699785,6.849621,225.877990,6.358908,-1.921085,-2.928185,42.222054,-8.909713,223.761300,-29.535811


Export preprocessed Dataset
===

In [10]:
gaia.to_csv("GAIA_refined.csv")