In [1]:
import numpy as np
import astropy.units as u
import re
import os
import skvideo.io
import math
import sunpy.coordinates
import shutil
import pandas as pd
from datetime import date, time, datetime, timedelta
from astropy.coordinates import SkyCoord
from sunpy.coordinates import frames
from PIL import Image, ImageDraw

# Generating Disks

### Author: Alex Feghhi 
### Created: 7/30/2019
### Last run: 8/1/2019

### Goals:
- Reads in a data text file containing relevant keywords
- Outputs time-labeled disks from the given start to end dates

Generate work directory and directory to store output files

In [2]:
workdir = 'C:/Users/alexf/Desktop/HMI_Data/'
Ydata_dir =  workdir + 'Ydata/'
Ydata_cut =  workdir + 'Ydata_cut/'

if not os.path.exists(workdir):
    os.mkdir(workdir)
    print("Directory " + workdir + "does not exist. Creating...")

if not os.path.exists(Ydata_dir):
    os.mkdir(Ydata_dir)
    print("Directory " + Ydata_dir + "does not exist. Creating...")
    
if not os.path.exists(Ydata_cut):
    os.mkdir(Ydata_cut)
    print("Directory " + Ydata_cut + "does not exist. Creating...")

Open and parse data into a Pandas dataframe

Data text file generated through downloading sharp files from jsoc and extracting keywords. Jupyter notebook here: (link to Jupyter notebook)

In [3]:
data = open(workdir + 'data.txt','r')
lines = data.readlines()
header = lines[0].split()
del lines[0]
df = pd.DataFrame([line.split() for line in lines],columns=header)

Clear all nan rows

Generate a radius dictionary by finding the max number of pixels per harp, then the maximum height/width at the time, and then dividing by 2.

NPIX is number of pixels, NAXIS1 and NAXIS2 are height and width in pixels

In [4]:
max_radius_rows = df[df.groupby(['HARPNUM'])['NPIX'].transform(max) == df['NPIX']]#isolate all rows with the max npix per harp
max_radius_rows['RADIUS'] = max_radius_rows[['NAXIS1', 'NAXIS2']].max(axis=1)/2#find the max height/width then divide by 2
radius_dict = pd.Series(max_radius_rows.RADIUS.values,index=max_radius_rows.HARPNUM).to_dict()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Dates selected to generate data for one year

In [9]:
start = datetime(2010, 5, 1,0,0,0)#date time object format is year, month, day, hour, minute, second
end = datetime(2011,5, 1, 0,0,0)#the end time is included amongst disks generated
time_interval = timedelta(minutes = 60)

The original image size is 4096 x 4096, we downsample to 256 x 256 for compactness. Also deletes and creates directories for each image size

Explain choice of dates (small sample to test code?)

Chose time period in demo video (include link)

In [11]:
output_sizes = [256]
for size in output_sizes:
    resize_dir = Ydata_dir + str(size)
    if  os.path.exists(resize_dir):#delete any resizing directories matching the new resizes
        shutil.rmtree(resize_dir)
    os.makedirs(resize_dir)
    resize_dir_cut = Ydata_cut + str(size)
    if  os.path.exists(resize_dir_cut):#delete any resizing directories matching the new resizes
        shutil.rmtree(resize_dir_cut)
    os.makedirs(resize_dir_cut)

Code to generate disks from the start to the end time

In [12]:
number_disks = int((end-start)/time_interval + 1)
for i in range(number_disks):
    time = start + i*time_interval
    timestring = time.strftime('%Y' + '.' + '%m' + '.' + '%d' + '_' + '%X' + '_TAI')
    obstimestring = time.strftime('%Y' + '-' + '%m' + '-' + '%d')
    rows = df.loc[df['T_REC'] == timestring].reset_index()
    disk = Image.new('1', (4096, 4096), color='black') # black/white canvas, initialized to zeros
    draw = ImageDraw.Draw(disk)
    if (not(rows.empty)):
        for index, row in rows.iterrows():#iterate over rows
            hpc1 = SkyCoord(float(rows.iloc[index]['LON_FWT'])*u.deg, float(rows.iloc[index]['LAT_FWT'])*u.deg, frame=frames.HeliographicStonyhurst, obstime=obstimestring)
            hpc_out = sunpy.coordinates.Helioprojective(observer='earth', obstime=obstimestring) # arcsec
            hpc2 = hpc1.transform_to(hpc_out) # convert to arcsecond
            xc = (hpc2.Tx / u.arcsec) # stripping units
            yc = (hpc2.Ty / u.arcsec)
            xc = (xc/float(rows.iloc[index]['CDELT1'])) + float(rows.iloc[index]['IMCRPIX1']) # convert to pixel value
            yc = (yc/float(rows.iloc[index]['CDELT2'])) + float(rows.iloc[index]['IMCRPIX2'])
            radius = radius_dict[rows.iloc[index]['HARPNUM']]
            draw.ellipse((xc-radius,yc-radius,xc+radius,yc+radius), 'white')
    for size in output_sizes:
        data = np.flipud(np.array(disk.resize((size,size), Image.BILINEAR))).astype(int)#remove.astype(int) to save as boolean
        np.save(Ydata_dir + str(size) + '/' + time.strftime('%Y' + '%m' + '%d' + '_' + '%H' +'%M' + '%S') + '_' + str(size), data)
        xycorner_dist = 2048 - (math.sqrt(2) * 2048 / 2)
        data_cut = np.flipud(disk.crop((xycorner_dist,xycorner_dist,4096-xycorner_dist,4096-xycorner_dist)).resize((size,size), Image.BILINEAR)).astype(int)
        np.save(Ydata_cut + str(size) + '/' + time.strftime('%Y' + '%m' + '%d' + '_' + '%H' +'%M' + '%S') + '_' + str(size) + '_cut', data_cut)


In [13]:
#create video function with (start,end,size)
def generate_video(start,end,size):
    video_dir = Ydata_dir + str(size) + '/'
    array_filenames = os.listdir(video_dir)
    start_index = array_filenames.index(start.strftime('%Y' + '%m' + '%d' + '_' + '%H' +'%M' + '%S') + '_' + str(size) + '.npy')
    end_index = array_filenames.index(end.strftime('%Y' + '%m' + '%d' + '_' + '%H' +'%M' + '%S') + '_' + str(size) + '.npy')
    outputdata = np.zeros((end_index + 1 - start_index, size, size))
    index = 0
    for filename in array_filenames[start_index:end_index + 1]:
        outputdata[index,:,:] = (1 - np.load(video_dir + filename)) * 255
        index = index + 1
    skvideo.io.vwrite(video_dir + 'outputvideo_'+str(size) +'.mp4', outputdata)

for size in output_sizes:
    generate_video(start,end,size)

In [12]:
#create video function with (start,end,size)
def generate_video_cut(start,end,size):
    video_dir = Ydata_cut + str(size) + '/'
    array_filenames = os.listdir(video_dir)
    start_index = array_filenames.index(start.strftime('%Y' + '%m' + '%d' + '_' + '%H' +'%M' + '%S') + '_' + str(size) + '_cut.npy')
    end_index = array_filenames.index(end.strftime('%Y' + '%m' + '%d' + '_' + '%H' +'%M' + '%S') + '_' + str(size) + '_cut.npy')
    outputdata = np.zeros((end_index + 1 - start_index, size, size))
    index = 0
    for filename in array_filenames[start_index:end_index + 1]:
        outputdata[index,:,:] = (1 - np.load(video_dir + filename)) * 255
        index = index + 1
    skvideo.io.vwrite(video_dir + 'outputvideo_'+str(size) +'_cut.mp4', outputdata)

for size in output_sizes:
    generate_video_cut(start,end,size)