# Numpy和Pandas库练习

In [133]:
import numpy as np

def add(x, y):
  """
  Parameters
  ----------
  x,y: python number or numpy.array

  Returns
  -------
  z: python number or numpy.array

  """
  if type(x)!=np.array:
    return x+y
  z=np.add(x,y)
  return z

## Numpy库：
$$
x_i = \frac{x_i}{\sqrt{\sum_{j=1}x_j^2}}
$$

In [134]:
import math


def norm(matrix, axis = 0):
  """A function to normalize a certain dimension of a matrix.
  将矩阵的某一维数归一化的函数。

  For example, if we input a matrix: np.array([[1,1,1,1],[2,2,2,2]]) 
  and axis = 1,
  we will get a output matrix: np.array([[0.5,0.5,0.5,0.5],[0.5,0.5,0.5,0.5]])

  Parameters
  ----------
  matrix: numpy.array
    The input matrix of two dimensions.

  Returns
  -------
  matrix: numpy.array
    The normalized matrix
  """
  newMatrix=[]
  sum=0.0
  for row in matrix:
    sum=0.0
    for j in row:
      sum+=j**2
    sum=math.sqrt(sum)
    newRow=[]
    for i in row:
      newRow.append(i/sum)
    newMatrix.append(newRow)
  return newMatrix

Run the following code to get the dataset of this work.

In [135]:
"""
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/
!mkdir -p week2
%cd /content/drive/MyDrive/week2
!git clone https://github.com/dddg617/interns
%cd /content/drive/MyDrive/week2/interns/weekly work/week2/data
"""

"\nfrom google.colab import drive\ndrive.mount('/content/drive')\n%cd /content/drive/MyDrive/\n!mkdir -p week2\n%cd /content/drive/MyDrive/week2\n!git clone https://github.com/dddg617/interns\n%cd /content/drive/MyDrive/week2/interns/weekly work/week2/data\n"

## pandas库 处理 dataset2.csv

In [136]:
import pandas as pd

def load_data():
   """
   1. 在这个函数中,您应该加载名为“dataset.csv”的数据集。
   2. 您需要删除包含空数据的行。
      假设行数为R。
   3. 需要处理带有“_”的异常数据。
      例如,您应该将“101_”替换为“101”。
   4. 你应该计算每一列的平均数。
      结果应该添加到原始的“DataFrame”中。
      这意味着,你返回的列数应该是R+1。
      
   Returns
   -------
   data: pandas.DataFrame
      The processed data following the instructions.
   """
   data=pd.read_csv('dataset.csv')

   # 处理带有"_"的异常数据，删除下划线
   data.replace(r'_', '', regex=True, inplace=True)

   # 将处理后的空串标记为NaN
   data.replace('', np.nan, inplace=True)

   # 删除包含空数据的行
   data.dropna(inplace=True)

   # 转换数据类型为float64
   data = data.astype(float)

   # 计算每一列的平均值，并添加到新列表中
   avg_values = data.mean().tolist()

   # 创建包含平均值的DataFrame，并将其与原始数据连接起来
   avg_data = pd.DataFrame([avg_values], columns=data.columns)
   data = pd.concat([data, avg_data], ignore_index=True)

   return data

After you implement these functions, you may run the following code to check your answer.

In [137]:
assert add(5,6) == 11
assert add(3.2,1.0) == 4.2
assert type(add(4., 4)) == float
np.testing.assert_allclose(add(np.array([1,2]), np.array([3,4])),
                np.array([4,6]))

data = np.array([[2, 4, 6], [1, 3, 5], [3, 6, 9]])
normalized_data = norm(data, axis=1)
assert np.allclose(normalized_data, [[0.26726124, 0.53452248, 0.80178373],
                    [0.16903085, 0.50709255, 0.84515425],
                    [0.26726124, 0.53452248, 0.80178373]])


data = load_data()

print(data)
#导出data
data.to_csv('data.csv', index=False)
assert len(data) == 328

print(data.values[-1])

for column in data.columns:
  assert np.allclose(data.values[-1], [293734.71875, 1.9938838481903076, 29.373088836669922,
                      9.994159698486328, 1739.4874267578125, 794.76171875, 388.9890441894531])


     Annual_Income  Num_of_Loan  Num_of_Delayed_Payment  Changed_Credit_Limit  \
0     31633.540000     3.000000                6.000000             12.700000   
1     33446.440000     6.000000               19.000000             22.450000   
2     60938.130000     8.000000               18.000000             12.490000   
3     73057.160000     2.000000               14.000000             10.000000   
4     62848.880000     5.000000               19.000000             15.340000   
..             ...          ...                     ...                   ...   
323   81760.000000     3.000000               21.000000              4.710000   
324   18207.570000  -100.000000               18.000000              9.700000   
325   78162.720000     4.000000               22.000000              3.280000   
326   36941.920000     9.000000               16.000000              6.740000   
327  293734.696636     1.993884               29.373089              9.994159   

     Outstanding_Debt  Amou