# Generate Quasars dataset

In [29]:
from tqdm.auto import tqdm
import pandas as pd

tqdm.pandas()

# Load the dataset
file_path = 'star_classification.csv'
df = pd.read_csv(file_path, encoding='ascii')

# Filter the dataset for QSO and No-QSO
qso_df = df[df['class'] == 'QSO'].sample(n=120, random_state=1)
no_qso_df = df[df['class'] != 'QSO'].sample(n=30, random_state=1)

# Check the number of entries for each class
count_qso = qso_df.shape[0]
count_no_qso = no_qso_df.shape[0]

print('Number of QSO entries:', count_qso)
print('Number of No-QSO entries:', count_no_qso)

Number of QSO entries: 120
Number of No-QSO entries: 30


In [30]:
import os
import requests
from PIL import Image
from io import BytesIO

# Function to fetch the image from SDSS and save it
def fetch_sdss_image(row, image_type, save_dir):
    print("Row: ", row)
    # Construct the URL for the SDSS image cutout service
    url = 'http://skyserver.sdss.org/dr16/SkyServerWS/ImgCutout/getjpeg?ra=' + str(row['alpha']) + '&dec=' + str(row['delta']) + '&scale=0.2&width=64&height=64'
    response = requests.get(url)
    if response.status_code == 200:
        # Open the image and save it
        image = Image.open(BytesIO(response.content))
        obj_id = str(row['obj_ID'])
        image.save(os.path.join(save_dir, image_type + '_' + obj_id + '.jpg'))
    else:
        print('Failed to retrieve image for obj_ID ' + str(row['obj_ID']))

# Create directories for QSO and No-QSO images if they don't exist
qso_dir = 'qso_images'
no_qso_dir = 'no_qso_images'
os.makedirs(qso_dir, exist_ok=True)
os.makedirs(no_qso_dir, exist_ok=True)


no_qso_df.progress_apply(lambda row: fetch_sdss_image(row, 'No-QSO', no_qso_dir), axis=1)


# Fetch and save images for QSO
# qso_df.progress_apply(lambda row: fetch_sdss_image(row, 'QSO', qso_dir), axis=1)

# Fetch and save images for No-QSO

print('Image retrieval and saving complete.')

  0%|          | 0/30 [00:00<?, ?it/s]

Row:  obj_ID         1237670965393620736.0
alpha                      158.88627
delta                      18.212081
u                           18.53873
g                           16.71179
r                           15.88959
i                           15.50151
z                            15.1892
run_ID                          5935
rerun_ID                         301
cam_col                            4
field_ID                         282
spec_obj_ID    2919510480412239872.0
class                         GALAXY
redshift                    0.064894
plate                           2593
MJD                            54175
fiber_ID                         189
Name: 7356, dtype: object
Row:  obj_ID         1237661970116838400.0
alpha                     196.502756
delta                       6.221412
u                            25.9792
g                           22.97388
r                           20.95534
i                           19.94467
z                           19.43108


# Generate Galaxy dataset

In [4]:
from tqdm.auto import tqdm
import pandas as pd

tqdm.pandas()

# Load the dataset
file_path = 'star_classification.csv'
df = pd.read_csv(file_path, encoding='ascii')

# Filter the dataset for GALAXY and No-GALAXY
qso_df = df[df['class'] == 'GALAXY'].sample(n=60, random_state=1)
no_qso_df = df[df['class'] != 'GALAXY'].sample(n=60, random_state=1)

# Check the number of entries for each class
count_qso = qso_df.shape[0]
count_no_qso = no_qso_df.shape[0]

print('Number of GALAXY entries:', count_qso)
print('Number of No-GALAXY entries:', count_no_qso)

Number of GALAXY entries: 60
Number of No-GALAXY entries: 60


In [5]:
import os
import requests
from PIL import Image
from io import BytesIO

# Function to fetch the image from SDSS and save it
def fetch_sdss_image(row, image_type, save_dir):
    print("Row: ", row)
    # Construct the URL for the SDSS image cutout service
    url = 'http://skyserver.sdss.org/dr16/SkyServerWS/ImgCutout/getjpeg?ra=' + str(row['alpha']) + '&dec=' + str(row['delta']) + '&scale=0.2&width=64&height=64'
    response = requests.get(url)
    if response.status_code == 200:
        # Open the image and save it
        image = Image.open(BytesIO(response.content))
        obj_id = str(row['obj_ID'])
        image.save(os.path.join(save_dir, image_type + '_' + obj_id + '.jpg'))
    else:
        print('Failed to retrieve image for obj_ID ' + str(row['obj_ID']))

# Create directories for QSO and No-QSO images if they don't exist
galaxy_dir = 'galaxy_images'
no_galaxy_dir = 'no_galaxy_images'
os.makedirs(galaxy_dir, exist_ok=True)
os.makedirs(no_galaxy_dir, exist_ok=True)


no_qso_df.progress_apply(lambda row: fetch_sdss_image(row, 'No-GALAXY', no_galaxy_dir), axis=1)
# Fetch and save images for QSO


qso_df.progress_apply(lambda row: fetch_sdss_image(row, 'GALAXY', galaxy_dir), axis=1)
# Fetch and save images for No-QSO

print('Image retrieval and saving complete.')

  0%|          | 0/60 [00:00<?, ?it/s]

Row:  obj_ID         1237670457508101888.0
alpha                      44.996697
delta                       34.71337
u                             17.985
g                           16.33867
r                           15.64343
i                           15.35487
z                           15.22151
run_ID                          5817
rerun_ID                         301
cam_col                            2
field_ID                         196
spec_obj_ID    8167544392078611456.0
class                           STAR
redshift                   -0.000143
plate                           7254
MJD                            56625
fiber_ID                         969
Name: 34722, dtype: object
Row:  obj_ID         1237678825172304640.0
alpha                     327.705067
delta                       4.172409
u                           22.93113
g                           21.97658
r                            21.9972
i                           21.80574
z                           22.34481

  0%|          | 0/60 [00:00<?, ?it/s]

Row:  obj_ID         1237659324948480512.0
alpha                     229.567766
delta                      50.255307
u                           24.57674
g                           22.47833
r                           20.52957
i                           19.57433
z                           19.05979
run_ID                          3225
rerun_ID                         301
cam_col                            2
field_ID                         129
spec_obj_ID    7574076664499689472.0
class                         GALAXY
redshift                         0.0
plate                           6727
MJD                            56369
fiber_ID                         538
Name: 68459, dtype: object
Row:  obj_ID         1237663916266554112.0
alpha                     121.285066
delta                       53.30715
u                           22.37352
g                           23.45388
r                           21.67516
i                           20.63716
z                           19.90065

# Rename Images sequentially

In [7]:
import os
from tqdm.auto import tqdm

tqdm.pandas()

# Function to rename images sequentially
def rename_images_sequentially(directory):
    files = os.listdir(directory)
    for i, filename in enumerate(sorted(files), 1):
        os.rename(
            os.path.join(directory, filename),
            os.path.join(directory, f'{i + 111}.jpg')
        )

# dir1 = 'galaxy_images/'
# rename_images_sequentially(dir1)

dir2 = 'dataset/No-galaxy1/'
rename_images_sequentially(dir2)

print('Image renaming complete.')


FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'dataset/No-galaxy1/18.jpg' -> 'dataset/No-galaxy1/2.jpg'

In [9]:
from astroquery.sdss import SDSS
from astropy import coordinates as coords
import astropy.units as u

# Define a coordinate object for the center of the search
# For example, let's use the location of the Coma Cluster
coma_cluster = coords.SkyCoord(194.9531, 27.9807, unit=(u.deg, u.deg), frame='icrs')


# Define the query to select galaxies within 0.1 degrees from the Coma Cluster
query = """
SELECT TOP 5
  p.objID, p.ra, p.dec, p.u, p.g, p.r, p.i, p.z, s.class
FROM PhotoObj AS p
  JOIN dbo.fGetNearbyObjEq({ra},{dec},3) AS n ON n.objID = p.objID
  JOIN SpecObj AS s ON s.bestobjid = p.objid
WHERE
  s.class = "QSO"
""".format(ra=coma_cluster.ra.deg, dec=coma_cluster.dec.deg)

# Execute the query
results = SDSS.query_sql(query)

# Display the results
print(results)

None


In [6]:
import os
from astroquery.sdss import SDSS
from astropy.coordinates import SkyCoord
import astropy.units as u

# Create a directory for galaxy images if it doesn't exist
os.makedirs('data', exist_ok=True)

# Function to download images
def download_images(table):
    for row in table:
        coords = SkyCoord(ra=row['ra']*u.deg, dec=row['dec']*u.deg, frame='icrs')
        images = SDSS.get_images(coordinates=coords, band='g', timeout=30)
        for image in images:
            image.writeto(f'data/{row["objID"]}.fits', overwrite=True)

# Call the function to download images
print('Downloading images...')
# Using the previously obtained results
# Note: The variable 'results' should contain the result from the previous query
# If the variable is not available, we need to re-run the query
if 'results' in locals():
    download_images(results)
else:
    print('Query results not found. Please re-run the query.')
print('Download complete.')

Downloading images...
Download complete.


In [9]:
coords = SkyCoord(ra= [135.6891066]
, dec= [32.49463184]
, frame='icrs')

images = SDSS.get_images(coordinates=coords, band='g', timeout=30)
for image in images:
    image.writeto(f'data/test.fits', overwrite=True)

UnitTypeError: Angle instances require units equivalent to 'rad', but no unit was given.