# Check Dataset

In [1]:
import pickle
import pandas as pd
import sunpy.map
import glob
import utils
from tqdm import tqdm
def pickle_dump(obj, path):
    with open(path, mode='wb') as f:
        pickle.dump(obj,f)
def pickle_load(path):
    with open(path, mode='rb') as f:
        data = pickle.load(f)
        return data

## データ数のカウント
データのインバランスが学習に与える可能性があるのでデータ内のフレアラベルがTrueのデータとFalseのデータを調べる


In [2]:
df_paths = sorted(glob.glob("../coord_dfs/*coord_df.pickle"))
keys = ["C_FLARE","M_FLARE","X_FLARE"]
for df_path in df_paths:
    coord_df = pickle_load(df_path)
    all_data = 0
    flare_data = 0
    month = df_path[-21:-15]
    print(month)
    for index,row in coord_df.iterrows():
        is_flared = [False for i in range(len(row["Polygon"]))]
        for i,cell in enumerate(is_flared):
            for key in keys:
                all_data +=1
                if row[key][i]!=0:
                    is_flared[i] = True
                    flare_data +=1
    print("all_data:{},flare_data:{}".format(all_data,flare_data))
    with open("imbalance.txt", mode="a") as f:
        f.write("{}:all_data:{},flare_data:{} imbalance:{}\n".format(month,all_data,flare_data,flare_data/all_data))



## 改めてデータセットをチェックする(2020/11/16)
そもそも回転しなくてもいい可能性がある
→全部が同じだけずれているのであればそのままで重ねてみればいいかもしれない
M_45s→179.929642
M_720s→
mharp.bitmap→180.0825



In [3]:
full_disk_path = "/Volumes/NO NAME/datasets/HMI_REGION/hmi.M_45s.20100501_000000_TAI.2.magnetogram.fits"
full_disk_map = sunpy.map.Map(full_disk_path)
# full_disk_meta = ["{}:{}".format(key,value) for key,value in full_disk_map.meta.items()]
full_disk_map.meta["crota2"]

179.929642

In [4]:
bitmap_path = "/Volumes/NO NAME/datasets/bitmap/hmi.Mharp_720s.*.20100501_000000_TAI.bitmap.fits"
bitmap_map = sunpy.map.Map(bitmap_path)
bitmap_map.meta["crota2"]
# bitmap_meta = ["{}:{}".format(key,value) for key,value in bitmap_map.meta.items()]
bitmap_map.meta["crota2"]

180.0825

In [5]:
full_disk_path = "/Volumes/NO NAME/datasets/HMI_REGION/hmi.M_720s.20100501_000000_TAI.1.magnetogram.fits"
full_disk_map = sunpy.map.Map(full_disk_path)
full_disk_map.meta["crota2"]

180.0134

In [6]:
# 画像書き出し
import cv2
cv2.imwrite("2010050100.png",full_disk_map.data)

True

In [7]:
from rasterio.features import shapes
import numpy as np
ar_polygon = shapes(bitmap_map.data.astype("int16"),mask=None,connectivity = 8)

## 複数個の場合のOverlay

In [8]:
bitmap_path = glob.glob("/Volumes/NO NAME/datasets/bitmap/201005/hmi.Mharp_720s.*.20100501_000000_TAI.bitmap.fits")
bitmap_path

['/Volumes/NO NAME/datasets/bitmap/201005/hmi.Mharp_720s.2.20100501_000000_TAI.bitmap.fits',
 '/Volumes/NO NAME/datasets/bitmap/201005/hmi.Mharp_720s.5.20100501_000000_TAI.bitmap.fits',
 '/Volumes/NO NAME/datasets/bitmap/201005/hmi.Mharp_720s.1.20100501_000000_TAI.bitmap.fits',
 '/Volumes/NO NAME/datasets/bitmap/201005/hmi.Mharp_720s.6.20100501_000000_TAI.bitmap.fits']

In [9]:
# import cv2
# from shapely.geometry import Polygon
# image = cv2.imread("2010050100.png")
# overlay=image.copy()
# int_coords = lambda x:np.array(x).round().astype(np.int32)
# alpha = 0.5
# for path in bitmap_path:
#     bitmap_map = sunpy.map.Map(path)
#     ll_x = bitmap_map.meta["crpix1"]
#     ll_y = bitmap_map.meta["crpix2"]
#     binarized_bitmap_map = np.where((bitmap_map.data==33)|(bitmap_map.data==34),1,0)
#     ar_polygon = shapes(binarized_bitmap_map.astype("int16"),mask=None,connectivity = 8)
#     for p in ar_polygon:
#         if(p[0]["coordinates"][0][0]!=(0.0,0.0)):
#             poly = [(coord[0]+ll_x,coord[1]+ll_y) for coord in p[0]["coordinates"][0]]
#             polygon = Polygon(poly)
#             exterior = [int_coords(polygon.exterior.coords)]
#             cv2.polylines(overlay,exterior,color=(255,255,0),isClosed=True,thickness = 10)
#         cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0, image)
# cv2.imwrite("2010050100_overlay.png",image)
    

## Polygonを拡張できないか試してみる
一般的なアルゴリズムは見つけられなかったので重心を見つける  
→重心との位置関係で拡張することにした

In [12]:
import cv2
from shapely.geometry import Polygon
from shapely.wkt import loads as load_wkt
image = cv2.imread("2010050100.png")
overlay=image.copy()
int_coords = lambda x:np.array(x).round().astype(np.int32)
alpha = 0.5
for path in bitmap_path:
    bitmap_map = sunpy.map.Map(path)
    ll_x = bitmap_map.meta["crpix1"]
    ll_y = bitmap_map.meta["crpix2"]
    binarized_bitmap_map = np.where((bitmap_map.data==33)|(bitmap_map.data==34),1,0)
    ar_polygon = shapes(binarized_bitmap_map.astype("int16"),mask=None,connectivity = 8)
    for p in ar_polygon:
        if(p[0]["coordinates"][0][0]!=(0.0,0.0)):
            poly = [[coord[0]+ll_x,coord[1]+ll_y] for coord in p[0]["coordinates"][0]]
            poly_txt = ["{} {}".format(coord[0]+ll_x,coord[1]+ll_y) for coord in p[0]["coordinates"][0]]
            poly_txt = ",".join(poly_txt)
            poly_txt = "POLYGON(("+poly_txt+"))" # wktの入力Formatに調整
            # polygon = Polygon(poly)
            polygon_wkt = load_wkt(poly_txt)
            centroid = polygon_wkt.centroid.wkt # 重心の導出
            c_x = centroid.split(" ")[1][1:]
            c_y = centroid.split(" ")[2][:-1]
            for point in poly:
                if point[0]< float(c_x):
                    point[0] -= 2
                else:
                    point[0] += 2
                if point[1] < float(c_y):
                    point[1] -= 2
                else:
                    point[1] += 2
            polygon = Polygon(poly)
            exterior = [int_coords(polygon.exterior.coords)]
            cv2.polylines(overlay,exterior,color=(255,255,0),isClosed=True,thickness = 10)
        cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0, image)
cv2.imwrite("2010050100_overlay_expanded.png",image)

True

In [7]:
coord_df = pickle_load("../coord_dfs/201006coord_df.pickle")


In [8]:
[flare for flare in coord_df["C_FLARE"]]

[[0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0,