# We propose a RadViz3D visualization method, we distributed uniformly the N points on surface of the sphere, then we obtained formulas for x, y, and z mappings using scatter 3D method.

# In this code you can find the steps we followed and each part of them explained in details

In [None]:
!pip install plotly
!pip install numpy
!pip install pandas
!pip install matplotlib
!pip install sklearn

In [2]:
#Import all libraries we need
import numpy as np
import pandas as pd
import plotly.express as px
from numpy import pi, cos, sin, arccos, arange
import mpl_toolkits.mplot3d
import matplotlib.pyplot as plt

######################################################

# 1- Normalize a column of dataframe using min max normalization
def matrixNormlization(X):
    return ((X-X.min())/(X.max()-X.min()))*1

######################################################

# 2- Find points on a circumference
#input variable radius of the circle and number of points, the output is (nX2) cordinates-points matrix in dataframe format 
#where n is number of points and 2 is two columns (x,y)
def get_points(radius, number_of_points):
    radians_between_each_point = 2*np.pi/number_of_points
    list_of_points = []
    for p in range(0, number_of_points):
        list_of_points.append( (radius*np.cos(p*radians_between_each_point),radius*np.sin(p*radians_between_each_point)) )
    return list_of_points

######################################################

# 3- get_Xmayrix function is designed to return X matrix "Dimension anchors matrix" 
# input is the number of atterbutes, output is X matrix (Dimension anchors matrix)
def get_Xmatrix(DS_names):
    DS_names=pd.DataFrame(DS_names)
    DS_names = DS_names.rename(columns={0: 'DS_names'})
    d=DS_names.size
    DS=get_points(1, d)
    DS= pd.DataFrame(DS).round(6)
    frames=frames = [DS_names,DS]
    X = pd.concat(frames,axis=1)
    X= X.set_index('DS_names')
    X.index.name = None
    return X

#############################################################

# 4- 2D Radviz function is created to get the S_hat matrix (2D-RadViz mapped versions of samples) 
#inputs are S matrix(samples or Records matrix) and X matrix (dimensions' anchors matrix) 
def Radviz2DMapping(S,X):
    S_hat=S.dot(X)  # S.X 
    S_hat=S_hat.div(S.sum(axis=1), axis=0) # divide by SUMX (the sum of each row vector) S_hat=S.X/SUMX
    return S_hat.fillna(0) #to solve divided by 0 problem (sumation problem incase all attributes value equal to 0) 

##############################################################

# 5- DataframePreparation function is created to Concatenate 3 matrixs, S_hat matrix"mapped version of 
#records "& X matrix"Dimensions Anchors" &Circle matrix "Circle Boundery matrix" 
#input are S_hat matrix(samples or Records matrix) and X matrix (dimensions' anchors matrix), and BPs stand for Number of 
#boundery points that will be used to draw the bounderies of the RadViz space 
#Output are the prepared dataframe that will be visualized  

def DataframePreparation(S_hat,X,d,BPs): 
    frames = [y,S_hat]
    S_hat = pd.concat(frames,axis=1)
    ############################## show Xi Dimiensions Anchors
    X=X.reset_index()
    ###############################################
    AnchorsLabel=np.append(np.full(BPs,''),  X['index'] )
    AnchorsLabel=np.append(AnchorsLabel, np.full(S_hat.shape[0],'') )
    ###############################################
    label =np.full((d), 'X')
    X['index']=label
    ############################# show diminion boundry
    C=get_points(1, BPs) #we need to change this area to be dinamically
    C= pd.DataFrame(C)
    label =np.full((BPs), 'circle') #we need to change this area to be dinamically
    label=pd.DataFrame(label)
    label = label.rename(columns={0: 'index'})
    frames = [label,C]
    circle = pd.concat(frames,axis=1)
    frames = [circle,X,S_hat]
    df = pd.concat(frames)
    df['AnchorsLabel']=AnchorsLabel
    return df

####################################################################
# 6- plotRadviz2D function is created to draw the prepared dataframe using plotty 2D scatter plot 
#input is the prepared dataframe and the output is the 2D RadViz plot
def plotRadviz2D(df):
    df.rename(columns={'index': 'label'},inplace=True)
    fig = px.scatter(df, x=0, y=1, color='label',text='AnchorsLabel')
    fig.update_layout(
        title='paper data set',
        dragmode='select',
        width=1000,
        height=1000,
        hovermode='closest',)
    fig.show()
    
####################################################################
# 7- RadViz2D  is designed to be the main 2D Radviz function
#It has three input y(labels column), X(features dataframe) and BPs (number of Boundaries' points)
#the output of this function is 2D RadViz plot
#mainly this function handles the whole process of Data visualization using the introduced RadViz algorithm(2D version)
def RadViz2D(y,X,BPs):
    y.rename("index",inplace=True) 
    X=matrixNormlization(X)
    S=X ##change the name to be comptable with the prove (S is the symbol matrix)
    DS_names=S.columns
    X=get_Xmatrix(DS_names) # X is DAs matrix
    S_hat=Radviz2DMapping(S,X)
    d=DS_names.size
    df = DataframePreparation(S_hat,X,d,BPs)
    plotRadviz2D(df)
    
###################################################################

# 8- get_Xmayrix function is designed to return X matrix "Dimension anchors matrix"
#input is the number of atterbutes, output is X matrix (Dimension anchors matrix)
def get_X3Dmatrix(DS_names):
    DS_names=pd.DataFrame(DS_names)
    DS_names = DS_names.rename(columns={0: 'DS_names'})
    d=DS_names.size
    DS=get_3Dpoints( d)
    DS= pd.DataFrame(DS).round(6)
    frames=frames = [DS_names,DS]
    X = pd.concat(frames,axis=1)
    X= X.set_index('DS_names')
    X.index.name = None
    return X

###################################################

# 9- Reference of algorthim that is used distribute the point over sphere 
#https://stackoverflow.com/questions/9600801/evenly-distributing-n-points-on-a-sphere
#we use The golden spiral method
#input variable radius of the sphere and number of points, the output is (nX3) cordinates-points matrix in dataframe format
#where n is number of points and 3 is three columns (x,y,z)  
def get_3Dpoints(d): #get_points function over a sphere
    indices = arange(0, d, dtype=float) + 0.5
    phi = arccos(1 - 2*indices/d)
    theta = pi * (1 + 5**0.5) * indices
    x, y, z = cos(theta) * sin(phi), sin(theta) * sin(phi), cos(phi);
    df = pd.DataFrame({'x': x, 'y': y, 'z': z})
    return df

##############################################

# 10- RadViz3D  is designed to be the main 3D Radviz function
#It has three input y(labels column), X(features dataframe) and BPs (number of Boundaries' points)
#the output of this function is 3D RadViz  plot
#mainly this function handles the whole process of Data visualization using the introduced RadViz algorithm(3D version)
def RadViz3D(y,X,BPs):
    y.rename("index",inplace=True) 
    X=matrixNormlization(X)
    S=X ##change the name to be comptable with the prove (S is the symbol matrix)
    DS_names=S.columns
    X=get_X3Dmatrix(DS_names) # X is DAs matrix
    S_hat=Radviz2DMapping(S,X)
    d=DS_names.size
    df = Dataframe3DPreparation(S_hat,X,d,BPs)
    plotRadviz3D(df)


####################################################

# 11- Dataframe3DPreparation function is created to Concatenate 3 matrixs,
#S_hat matrix"mapped version of records "& X matrix"Dimensions Anchors" &Circle matrix "Circle Boundery matrix" 
#input are S_hat matrix(samples or Records matrix) and X matrix (dimensions' anchors matrix),
#and BPs stand for Number of boundery points that will be used to draw the bounderies of the RadViz space (10000) 
#Output are the prepared dataframe that will be visualized 

def Dataframe3DPreparation(S_hat,X,d,BPs):
    frames = [y,S_hat]
    S_hat = pd.concat(frames,axis=1)
    ############################## show Xi Dimiensions Anchors
    X=X.reset_index()
    ####################################AnchorsLabel df.shape[0] list = [''] * 20
    #AnchorsLabel==np.full((BPs), '') 
    AnchorsLabel=np.append(np.full(BPs,''),  X['index'] )
    AnchorsLabel=np.append(AnchorsLabel, np.full(S_hat.shape[0],'') )
    #print('AnchorsLabel')
    #print( AnchorsLabel)
    label =np.full((d), 'X')
    X['index']=label
    ############################# show diminion boundry
    C=get_3Dpoints(BPs) 
    C= pd.DataFrame(C)
    label =np.full((BPs), 'sphere') 
    label=pd.DataFrame(label)
    label = label.rename(columns={0: 'index'})
    frames = [label,C]
    sphere = pd.concat(frames,axis=1)
    frames = [sphere,X,S_hat]
    df = pd.concat(frames)
    df['AnchorsLabel']=AnchorsLabel
    return df


######################################################

# 12- plotRadviz3D function is created to draw the prepared dataframe using plotty 3D scatter plot 
#input is the prepared dataframe and the output is the 3D RadViz plot
def plotRadviz3D(df): 
    df.rename(columns={'index': 'label'},inplace=True)
    fig = px.scatter_3d(df, x='x', y='y', z='z',
              color='label',text='AnchorsLabel')
    fig.show()
    
    
######################################################

# 13- RadViz3D  is designed to be the main 3D Radviz function (Bar color) used when y are numerical values and not catgories
#It has three input y(labels column), X(features dataframe) and BPs (number of Boundaries' points)
#the output of this function is 3D RadViz  plot
#mainly this function handles the whole process of Data visualization using the introduced RadViz algorithm(3D version)
def RadViz3DH(y,X,BPs):
    y.rename("index",inplace=True) 
    X=matrixNormlization(X)
    S=X ##change the name to be comptable with the prove (S is the symbol matrix)
    DS_names=S.columns
    X=get_X3Dmatrix(DS_names) # X is DAs matrix
    S_hat=Radviz2DMapping(S,X)
    frames = [y,S_hat]
    S_hat = pd.concat(frames,axis=1)
    plotRadviz3D(S_hat)