# Data Analysis over the Clusters
## Loading libraries

In [1]:
import numpy as np
import cygnus_lib as cy
import toolslib as tl
import matplotlib.pyplot as plt
import pandas as pd
from time import sleep

## font definition
%matplotlib inline
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Bitstream Vera Sans']
plt.rcParams['font.serif'] = ['Bitstream Vera Sans']

## Variables definition

In [2]:
x_resolution = y_resolution = 2048
rescale      = 512
scale        = int(x_resolution/rescale)

## Loading data

List contain:

[0 - Run number, 1 - Image Number, 2 - Tag of the cluster, 3 - Pixel X position, 4 - Pixel Y position, 5 - Light in the pixel, 6 - Pedestal in the pixel]

In [25]:
#------------------- Loading File ------------------------------#
directory = ("./data/")          # Directory of the output file
filename  = ("ClustersTable")    # Name of the output file
extension = (".csv")             # Extension of the output file
dataout = directory + filename + extension # Full path of the output file

dt = {'Run': np.int64, 'Image': np.int64, 'Tag': np.object, 'X': np.object, 'Y': np.object, 'Light': np.object, 'Pedestal': np.object}

df = pd.read_csv(dataout,dtype=dt)

In [26]:
# Showing the table
df.head(n = 10)

Unnamed: 0,Run,Image,Tag,X,Y,Light,Pedestal
0,494,0,m,"[808, 808, 808, 808, 809, 809, 809, 809, 810, ...","[40, 41, 42, 43, 40, 41, 42, 43, 40, 41, 42, 4...","[103, 96, 98, 103, 97, 129, 99, 120, 100, 98, ...","[103, 102, 101, 104, 100, 115, 102, 104, 104, ..."
1,494,0,m,"[652, 652, 652, 652, 653, 653, 653, 653, 654, ...","[112, 113, 114, 115, 112, 113, 114, 115, 112, ...","[98, 102, 99, 104, 103, 102, 101, 98, 103, 99,...","[99, 102, 103, 102, 102, 102, 100, 102, 102, 1..."
2,494,0,m,"[404, 404, 404, 404, 405, 405, 405, 405, 406, ...","[160, 161, 162, 163, 160, 161, 162, 163, 160, ...","[96, 101, 102, 103, 100, 100, 100, 98, 104, 10...","[96, 102, 102, 102, 99, 103, 102, 99, 102, 103..."
3,494,0,m,"[1272, 1272, 1272, 1272, 1273, 1273, 1273, 127...","[488, 489, 490, 491, 488, 489, 490, 491, 488, ...","[103, 96, 100, 103, 100, 105, 100, 100, 103, 1...","[102, 103, 103, 103, 103, 103, 106, 102, 102, ..."
4,494,0,m,"[1208, 1208, 1208, 1208, 1209, 1209, 1209, 120...","[724, 725, 726, 727, 724, 725, 726, 727, 724, ...","[99, 108, 106, 102, 102, 106, 111, 100, 101, 1...","[102, 103, 102, 102, 104, 102, 110, 102, 102, ..."
5,494,0,m,"[1516, 1516, 1516, 1516, 1517, 1517, 1517, 151...","[780, 781, 782, 783, 780, 781, 782, 783, 780, ...","[103, 108, 116, 100, 113, 111, 111, 110, 109, ...","[102, 102, 105, 102, 109, 103, 102, 103, 104, ..."
6,494,0,m,"[1484, 1484, 1484, 1484, 1485, 1485, 1485, 148...","[856, 857, 858, 859, 856, 857, 858, 859, 856, ...","[109, 100, 103, 98, 104, 101, 101, 110, 114, 1...","[103, 104, 104, 107, 103, 103, 106, 104, 104, ..."
7,494,0,m,"[700, 700, 700, 700, 701, 701, 701, 701, 702, ...","[996, 997, 998, 999, 996, 997, 998, 999, 996, ...","[133, 106, 106, 101, 100, 120, 113, 103, 103, ...","[135, 103, 102, 102, 104, 110, 105, 102, 103, ..."
8,494,0,m,"[516, 516, 516, 516, 517, 517, 517, 517, 518, ...","[1060, 1061, 1062, 1063, 1060, 1061, 1062, 106...","[100, 103, 102, 111, 103, 98, 99, 101, 100, 11...","[105, 102, 104, 107, 103, 103, 99, 103, 102, 1..."
9,494,0,m,"[804, 804, 804, 804, 805, 805, 805, 805, 806, ...","[1240, 1241, 1242, 1243, 1240, 1241, 1242, 124...","[107, 102, 102, 104, 109, 109, 114, 107, 111, ...","[100, 105, 104, 103, 105, 107, 109, 103, 103, ..."


In [27]:
variables = df.iloc[:,0:3].copy()
variables.head(n = 5)

Unnamed: 0,Run,Image,Tag
0,494,0,m
1,494,0,m
2,494,0,m
3,494,0,m
4,494,0,m


## Declaration  of the new variables

In [28]:
sl = np.zeros(df.shape[0],dtype=float)
sb = np.zeros(df.shape[0],dtype=float)
sp = np.zeros(df.shape[0],dtype=float)
co = np.zeros(df.shape[0],dtype=float)

In [29]:
teste = df.X[0]

In [48]:
t = df.X[0][0]
t

'['

In [7]:
## Loop to calculate the new variables

for ind in range(0,df.shape[0]):
    sl[ind] = np.sum(df.Light[ind])
    sb[ind] = np.sum(df.Pedestal[ind])
    sp[ind] = np.size(df.X[ind])
    co[ind] = np.abs(np.corrcoef(df.X[ind],df.Y[ind])[0][1])    
    #lenY[ind]  = np.mean(np.unique(df.X[ind],return_counts=True)[1]*scale)
    #lenX[ind]  = np.mean(np.unique(df.Y[ind],return_counts=True)[1]*scale)

TypeError: cannot perform reduce with flexible type

In [None]:
# Insertion in the table
variables.insert(3,'SumLight',sl)
variables.insert(4,'SumPedestal',sb)
variables.insert(5,'SumPixels',sp)
variables.insert(6,'PhotonPPixels',(sl-sb)/sp)
variables.insert(7,'XYCorrelation',co)
variables.insert(8,'LightPPixel',sl/sp)

In [None]:
variables.head(n=5)

In [None]:
variables[(variables.Tag == 'l') & (variables.PhotonPPixels < 5)]

In [None]:
SLP = tl.getTaggedVariable(variables,'SumLight')
PPP = tl.getTaggedVariable(variables,'PhotonPPixels')
COR = tl.getTaggedVariable(variables,'XYCorrelation')
SPP = tl.getTaggedVariable(variables,'SumPixels')
LPP = tl.getTaggedVariable(variables,'LightPPixel')

In [None]:
tl.plot2hist(PPP, bins = 25, nsd = 2, nse = 6, label='Photons/Pixels', logx = False, logy = False)
tl.plot2hist(LPP, bins = 25, nsd = 2, nse = 6, label='Light/Pixels', logx = False, logy = False)
tl.plot2hist(SLP, bins = 25, nsd = 1, nse = 5, label='Light in the cluster', logx = False, logy = False)
tl.plot2hist(COR, bins = 25, nsd = 2, nse = 2, label='XYCorrelation', logx = False, logy = False)
tl.plot2hist(SPP, bins = 25, nsd = 1, nse = 5, label='SumPixels', logx = False, logy = False)

## Ploting the specified type of cluster

In [None]:
tag = 'm'
for cluN in df[df['Tag'] == tag].index:

    #cluN = 91 ## Specified Cluster

    Run = df[colhead[0]][cluN]
    Nim = df[colhead[1]][cluN]
    Xi  = df[colhead[3]][cluN]
    Yi  = df[colhead[4]][cluN]
    Lp  = df[colhead[5]][cluN]
    Lb  = df[colhead[6]][cluN]

    matrix = np.zeros([y_resolution,x_resolution],dtype=int)
    matrixb = np.zeros([y_resolution,x_resolution],dtype=int)
    
    matrix[Yi,Xi]=Lp
    matrixb[Yi,Xi]=Lb
    
    fig = plt.figure(figsize=(15,15))
    ax  = plt.gca()
    iax = ax.imshow(matrix,cmap="viridis", vmin=85,vmax=130)
    ax.set_ylim(np.min(Yi),np.max(Yi))
    ax.set_xlim(np.min(Xi),np.max(Xi))
    ax.set_title('Run %d - # of Image %d' % (Run, Nim))
    tl.colorbar(iax)
    plt.show(block=False)    
    
    key = input('Press <ret> to continue -> ')
    if key == 'stop':
        break
    plt.close

## Developing 

In [None]:
vecXY = np.array([np.array(df.X[91]),np.array(df.Y[91])]).T

newXY = rotClu(vecXY)

In [None]:
def rotClu(vector, angle = 45):
    theta = (angle/180.) * np.pi

    rotMatrix = np.array([[np.cos(theta), - np.sin(theta)], 
                             [np.sin(theta),  np.cos(theta)]])

    vectorl = np.dot(vector,rotMatrix)
    return vectorl