In [29]:
from tqdm.auto import tqdm
import pandas as pd

tqdm.pandas()

# Load the dataset
file_path = 'star_classification.csv'
df = pd.read_csv(file_path, encoding='ascii')

# Filter the dataset for QSO and No-QSO
qso_df = df[df['class'] == 'QSO'].sample(n=120, random_state=1)
no_qso_df = df[df['class'] != 'QSO'].sample(n=30, random_state=1)

# Check the number of entries for each class
count_qso = qso_df.shape[0]
count_no_qso = no_qso_df.shape[0]

print('Number of QSO entries:', count_qso)
print('Number of No-QSO entries:', count_no_qso)

Number of QSO entries: 120
Number of No-QSO entries: 30


In [30]:
import os
import requests
from PIL import Image
from io import BytesIO

# Function to fetch the image from SDSS and save it
def fetch_sdss_image(row, image_type, save_dir):
    print("Row: ", row)
    # Construct the URL for the SDSS image cutout service
    url = 'http://skyserver.sdss.org/dr16/SkyServerWS/ImgCutout/getjpeg?ra=' + str(row['alpha']) + '&dec=' + str(row['delta']) + '&scale=0.2&width=64&height=64'
    response = requests.get(url)
    if response.status_code == 200:
        # Open the image and save it
        image = Image.open(BytesIO(response.content))
        obj_id = str(row['obj_ID'])
        image.save(os.path.join(save_dir, image_type + '_' + obj_id + '.jpg'))
    else:
        print('Failed to retrieve image for obj_ID ' + str(row['obj_ID']))

# Create directories for QSO and No-QSO images if they don't exist
qso_dir = 'qso_images'
no_qso_dir = 'no_qso_images'
os.makedirs(qso_dir, exist_ok=True)
os.makedirs(no_qso_dir, exist_ok=True)


no_qso_df.progress_apply(lambda row: fetch_sdss_image(row, 'No-QSO', no_qso_dir), axis=1)


# Fetch and save images for QSO
# qso_df.progress_apply(lambda row: fetch_sdss_image(row, 'QSO', qso_dir), axis=1)

# Fetch and save images for No-QSO

print('Image retrieval and saving complete.')

  0%|          | 0/30 [00:00<?, ?it/s]

Row:  obj_ID         1237670965393620736.0
alpha                      158.88627
delta                      18.212081
u                           18.53873
g                           16.71179
r                           15.88959
i                           15.50151
z                            15.1892
run_ID                          5935
rerun_ID                         301
cam_col                            4
field_ID                         282
spec_obj_ID    2919510480412239872.0
class                         GALAXY
redshift                    0.064894
plate                           2593
MJD                            54175
fiber_ID                         189
Name: 7356, dtype: object
Row:  obj_ID         1237661970116838400.0
alpha                     196.502756
delta                       6.221412
u                            25.9792
g                           22.97388
r                           20.95534
i                           19.94467
z                           19.43108


In [35]:
import os
from tqdm.auto import tqdm

tqdm.pandas()

# Function to rename images sequentially
def rename_images_sequentially(directory, prefix):
    files = os.listdir(directory)
    for i, filename in enumerate(sorted(files), 1):
        os.rename(
            os.path.join(directory, filename),
            os.path.join(directory, f'{i}.jpg')
        )

# Rename QSO images
qso_dir = 'dataset/qso1/'
rename_images_sequentially(qso_dir, 'QSO')

# Rename No-QSO images
no_qso_dir = 'dataset/noqso1/'
rename_images_sequentially(no_qso_dir, 'No-QSO')

# Confirm the renaming
qso_files = sorted(os.listdir(qso_dir))
no_qso_files = sorted(os.listdir(no_qso_dir))

print('First 5 QSO image files:', qso_files[:5])
print('First 5 No-QSO image files:', no_qso_files[:5])

First 5 QSO image files: ['1.jpg', '10.jpg', '11.jpg', '12.jpg', '13.jpg']
First 5 No-QSO image files: ['1.jpg', '10.jpg', '11.jpg', '12.jpg', '13.jpg']


In [9]:
from astroquery.sdss import SDSS
from astropy import coordinates as coords
import astropy.units as u

# Define a coordinate object for the center of the search
# For example, let's use the location of the Coma Cluster
coma_cluster = coords.SkyCoord(194.9531, 27.9807, unit=(u.deg, u.deg), frame='icrs')


# Define the query to select galaxies within 0.1 degrees from the Coma Cluster
query = """
SELECT TOP 5
  p.objID, p.ra, p.dec, p.u, p.g, p.r, p.i, p.z, s.class
FROM PhotoObj AS p
  JOIN dbo.fGetNearbyObjEq({ra},{dec},3) AS n ON n.objID = p.objID
  JOIN SpecObj AS s ON s.bestobjid = p.objid
WHERE
  s.class = "QSO"
""".format(ra=coma_cluster.ra.deg, dec=coma_cluster.dec.deg)

# Execute the query
results = SDSS.query_sql(query)

# Display the results
print(results)

None


In [6]:
import os
from astroquery.sdss import SDSS
from astropy.coordinates import SkyCoord
import astropy.units as u

# Create a directory for galaxy images if it doesn't exist
os.makedirs('data', exist_ok=True)

# Function to download images
def download_images(table):
    for row in table:
        coords = SkyCoord(ra=row['ra']*u.deg, dec=row['dec']*u.deg, frame='icrs')
        images = SDSS.get_images(coordinates=coords, band='g', timeout=30)
        for image in images:
            image.writeto(f'data/{row["objID"]}.fits', overwrite=True)

# Call the function to download images
print('Downloading images...')
# Using the previously obtained results
# Note: The variable 'results' should contain the result from the previous query
# If the variable is not available, we need to re-run the query
if 'results' in locals():
    download_images(results)
else:
    print('Query results not found. Please re-run the query.')
print('Download complete.')

Downloading images...
Download complete.


In [9]:
coords = SkyCoord(ra= [135.6891066]
, dec= [32.49463184]
, frame='icrs')

images = SDSS.get_images(coordinates=coords, band='g', timeout=30)
for image in images:
    image.writeto(f'data/test.fits', overwrite=True)

UnitTypeError: Angle instances require units equivalent to 'rad', but no unit was given.