In [17]:
import pandas as pd
from astroquery.gaia import Gaia
import numpy as np

def generate_random_coordinates(num_stars):
    # Generate random coordinates within the valid range
    ra = np.random.uniform(0, 360, num_stars)
    dec = np.random.uniform(-90, 90, num_stars)
    return ra, dec

def query_gaia_dr2(ra, dec):
    # Query Gaia DR2 catalog
    query = f"SELECT * FROM gaiadr2.gaia_source WHERE CONTAINS(POINT('ICRS',gaiadr2.gaia_source.ra,gaiadr2.gaia_source.dec),CIRCLE('ICRS',{ra},{dec},0.1))=1"
    job = Gaia.launch_job(query)
    result = job.get_results()
    return result

# Number of stars to retrieve
num_stars = 10

# Generate random coordinates
ra, dec = generate_random_coordinates(num_stars)

# Store data in a DataFrame
data = {'Star': [], 'RA': [], 'Dec': [], 'Gaia Data': []}

result = query_gaia_dr2(ra[0], dec[0])

dictionary = {}
for name in result.colnames:
    dictionary[str(name)] = result[str(name)]
df = pd.DataFrame(data = dictionary)


for i in range(1, num_stars):
    result = query_gaia_dr2(ra[i], dec[i])
    dictionary = {}
    for name in result.colnames:
        dictionary[str(name)] = result[str(name)]
    df_2 = pd.DataFrame(data = dictionary)
    df = pd.concat([df, df_2], ignore_index = True)
    df.reset_index()

In [18]:
count = 0
var_stars = []
for flag in df['phot_variable_flag']:
    if flag == 'NOT_AVAILABLE':
        count = count + 1
count

3192

In [19]:
df

Unnamed: 0,solution_id,DESIGNATION,source_id,random_index,ref_epoch,ra,ra_error,dec,dec_error,parallax,...,e_bp_min_rp_percentile_upper,flame_flags,radius_val,radius_percentile_lower,radius_percentile_upper,lum_val,lum_percentile_lower,lum_percentile_upper,datalink_url,epoch_photometry_url
0,1635721458409799680,Gaia DR2 6229912666291516544,6229912666291516544,433386574,2015.5,220.114695,0.448994,-24.894330,0.499219,-0.713117,...,,,,,,,,,https://gea.esac.esa.int/data-server/datalink/...,
1,1635721458409799680,Gaia DR2 6229901812910708992,6229901812910708992,305404372,2015.5,220.304303,0.089515,-24.873730,0.103368,0.692743,...,,,,,,,,,https://gea.esac.esa.int/data-server/datalink/...,
2,1635721458409799680,Gaia DR2 6229900124986975232,6229900124986975232,1301369073,2015.5,220.155203,0.331923,-24.946347,0.380369,-1.003187,...,,,,,,,,,https://gea.esac.esa.int/data-server/datalink/...,
3,1635721458409799680,Gaia DR2 6229912735011004928,6229912735011004928,1689639812,2015.5,220.139874,0.234308,-24.879932,0.328498,1.459692,...,,,,,,,,,https://gea.esac.esa.int/data-server/datalink/...,
4,1635721458409799680,Gaia DR2 6229899682607123456,6229899682607123456,1683267114,2015.5,220.189334,0.049197,-24.954606,0.056622,0.867516,...,,200111.0,1.364239,1.249299,1.576502,1.190341,1.023639,1.357043,https://gea.esac.esa.int/data-server/datalink/...,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3188,1635721458409799680,Gaia DR2 4896853949755358848,4896853949755358848,783543253,2015.5,62.636610,0.035142,-24.694181,0.049995,0.371219,...,0.3107,,,,,,,,https://gea.esac.esa.int/data-server/datalink/...,
3189,1635721458409799680,Gaia DR2 4896853808019458304,4896853808019458304,1211166211,2015.5,62.620961,0.131785,-24.710995,0.178275,0.510467,...,,,,,,,,,https://gea.esac.esa.int/data-server/datalink/...,
3190,1635721458409799680,Gaia DR2 4896877348737185792,4896877348737185792,1453026856,2015.5,62.667914,0.210112,-24.699482,0.295139,0.472222,...,,,,,,,,,https://gea.esac.esa.int/data-server/datalink/...,
3191,1635721458409799680,Gaia DR2 4896873294288058496,4896873294288058496,1221722339,2015.5,62.722493,0.030934,-24.755065,0.042637,3.607617,...,0.6152,,,,,,,,https://gea.esac.esa.int/data-server/datalink/...,


In [8]:
df.to_csv('gaia_datas.csv', index=False)

In [9]:
def remove_duplicate_rows(input_csv, output_csv):
    # Read CSV file into a DataFrame
    df = pd.read_csv(input_csv)

    # Remove duplicate rows
    df_no_duplicates = df.drop_duplicates()

    # Save the DataFrame to a new CSV file
    df_no_duplicates.to_csv(output_csv, index=False)

In [10]:
input_file = 'gaia_datas.csv'  # Replace with your input file name
output_file = 'output_data.csv'  # Replace with your desired output file name

remove_duplicate_rows(input_file, output_file)